clang-format the src/Pipeline directory

Bug: b/144825072

Change-Id: I869aef91d6318bf6955581e5dad762800bd46296
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39655
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index 1d6d0a3..4ee8375 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -26,16 +26,21 @@
 
 namespace {
 
-enum { X, Y, Z };
+enum
+{
+	X,
+	Y,
+	Z
+};
 
 }  // anonymous namespace
 
 namespace sw {
 
 ComputeProgram::ComputeProgram(SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets)
-	: shader(shader),
-	  pipelineLayout(pipelineLayout),
-	  descriptorSets(descriptorSets)
+    : shader(shader)
+    , pipelineLayout(pipelineLayout)
+    , descriptorSets(descriptorSets)
 {
 }
 
@@ -53,46 +58,41 @@
 	shader->emitEpilog(&routine);
 }
 
-void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3])
+void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3])
 {
-	routine->setInputBuiltin(shader, spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		auto numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
 		for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
 		{
 			value[builtin.FirstComponent + component] =
-				As<SIMD::Float>(SIMD::Int(Extract(numWorkgroups, component)));
+			    As<SIMD::Float>(SIMD::Int(Extract(numWorkgroups, component)));
 		}
 	});
 
-	routine->setInputBuiltin(shader, spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
 		{
 			value[builtin.FirstComponent + component] =
-				As<SIMD::Float>(SIMD::Int(workgroupID[component]));
+			    As<SIMD::Float>(SIMD::Int(workgroupID[component]));
 		}
 	});
 
-	routine->setInputBuiltin(shader, spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		auto workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
 		for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
 		{
 			value[builtin.FirstComponent + component] =
-				As<SIMD::Float>(SIMD::Int(Extract(workgroupSize, component)));
+			    As<SIMD::Float>(SIMD::Int(Extract(workgroupSize, component)));
 		}
 	});
 
-	routine->setInputBuiltin(shader, spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 1);
 		auto subgroupsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, subgroupsPerWorkgroup));
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupsPerWorkgroup));
 	});
 
-	routine->setInputBuiltin(shader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 1);
 		auto invocationsPerSubgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerSubgroup));
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(invocationsPerSubgroup));
@@ -101,7 +101,7 @@
 	routine->setImmutableInputBuiltins(shader);
 }
 
-void ComputeProgram::setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex)
+void ComputeProgram::setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex)
 {
 	Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
 	Int4 workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
@@ -114,35 +114,31 @@
 	{
 		SIMD::Int idx = localInvocationIndex;
 		localInvocationID[Z] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY);
-		idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo
+		idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY);  // modulo
 		localInvocationID[Y] = idx / SIMD::Int(workgroupSizeX);
-		idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX); // modulo
+		idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX);  // modulo
 		localInvocationID[X] = idx;
 	}
 
-	routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<SIMD::Float>(localInvocationIndex);
 	});
 
-	routine->setInputBuiltin(shader, spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupIndex));
 	});
 
-	routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
 		{
 			value[builtin.FirstComponent + component] =
-				As<SIMD::Float>(localInvocationID[component]);
+			    As<SIMD::Float>(localInvocationID[component]);
 		}
 	});
 
-	routine->setInputBuiltin(shader, spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine->setInputBuiltin(shader, spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		SIMD::Int wgID = 0;
 		wgID = Insert(wgID, workgroupID[X], X);
 		wgID = Insert(wgID, workgroupID[Y], Y);
@@ -156,7 +152,7 @@
 	});
 }
 
-void ComputeProgram::emit(SpirvRoutine* routine)
+void ComputeProgram::emit(SpirvRoutine *routine)
 {
 	Pointer<Byte> data = Arg<0>();
 	Int workgroupX = Arg<1>();
@@ -174,7 +170,7 @@
 
 	Int invocationsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerWorkgroup));
 
-	Int workgroupID[3] = {workgroupX, workgroupY, workgroupZ};
+	Int workgroupID[3] = { workgroupX, workgroupY, workgroupZ };
 	setWorkgroupBuiltins(data, routine, workgroupID);
 
 	For(Int i = 0, i < subgroupCount, i++)
@@ -194,11 +190,11 @@
 }
 
 void ComputeProgram::run(
-	vk::DescriptorSet::Bindings const &descriptorSets,
-	vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
-	PushConstantStorage const &pushConstants,
-	uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
-	uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
+    vk::DescriptorSet::Bindings const &descriptorSets,
+    vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
+    PushConstantStorage const &pushConstants,
+    uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+    uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
 {
 	auto &modes = shader->getModes();
 
@@ -231,8 +227,7 @@
 	for(uint32_t batchID = 0; batchID < batchCount && batchID < groupCount; batchID++)
 	{
 		wg.add(1);
-		marl::schedule([=, &data]
-		{
+		marl::schedule([=, &data] {
 			defer(wg.done());
 			std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
 
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
index 75f0cf4..9b93f4e 100644
--- a/src/Pipeline/ComputeProgram.hpp
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -17,13 +17,15 @@
 
 #include "SpirvShader.hpp"
 
-#include "Reactor/Coroutine.hpp"
 #include "Device/Context.hpp"
+#include "Reactor/Coroutine.hpp"
 #include "Vulkan/VkDescriptorSet.hpp"
 
 #include <functional>
 
-namespace vk { class PipelineLayout; }
+namespace vk {
+class PipelineLayout;
+}
 
 namespace sw {
 
@@ -34,13 +36,13 @@
 
 // ComputeProgram builds a SPIR-V compute shader.
 class ComputeProgram : public Coroutine<SpirvShader::YieldResult(
-		void* data,
-		int32_t workgroupX,
-		int32_t workgroupY,
-		int32_t workgroupZ,
-		void* workgroupMemory,
-		int32_t firstSubgroup,
-		int32_t subgroupCount)>
+                           void *data,
+                           int32_t workgroupX,
+                           int32_t workgroupY,
+                           int32_t workgroupZ,
+                           void *workgroupMemory,
+                           int32_t firstSubgroup,
+                           int32_t subgroupCount)>
 {
 public:
 	ComputeProgram(SpirvShader const *spirvShader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets);
@@ -52,35 +54,35 @@
 
 	// run executes the compute shader routine for all workgroups.
 	void run(
-		vk::DescriptorSet::Bindings const &descriptorSetBindings,
-		vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
-		PushConstantStorage const &pushConstants,
-		uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
-		uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
+	    vk::DescriptorSet::Bindings const &descriptorSetBindings,
+	    vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
+	    PushConstantStorage const &pushConstants,
+	    uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+	    uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
 
 protected:
-	void emit(SpirvRoutine* routine);
-	void setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3]);
-	void setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex);
+	void emit(SpirvRoutine *routine);
+	void setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3]);
+	void setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex);
 
 	struct Data
 	{
 		vk::DescriptorSet::Bindings descriptorSets;
 		vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets;
-		uint4 numWorkgroups; // [x, y, z, 0]
-		uint4 workgroupSize; // [x, y, z, 0]
-		uint32_t invocationsPerSubgroup; // SPIR-V: "SubgroupSize"
-		uint32_t subgroupsPerWorkgroup; // SPIR-V: "NumSubgroups"
-		uint32_t invocationsPerWorkgroup; // Total number of invocations per workgroup.
+		uint4 numWorkgroups;               // [x, y, z, 0]
+		uint4 workgroupSize;               // [x, y, z, 0]
+		uint32_t invocationsPerSubgroup;   // SPIR-V: "SubgroupSize"
+		uint32_t subgroupsPerWorkgroup;    // SPIR-V: "NumSubgroups"
+		uint32_t invocationsPerWorkgroup;  // Total number of invocations per workgroup.
 		PushConstantStorage pushConstants;
 		const Constants *constants;
 	};
 
-	SpirvShader const * const shader;
-	vk::PipelineLayout const * const pipelineLayout;
+	SpirvShader const *const shader;
+	vk::PipelineLayout const *const pipelineLayout;
 	const vk::DescriptorSet::Bindings &descriptorSets;
 };
 
 }  // namespace sw
 
-#endif   // sw_ComputeProgram_hpp
+#endif  // sw_ComputeProgram_hpp
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp
index 6eaf92d..1f98769 100644
--- a/src/Pipeline/Constants.cpp
+++ b/src/Pipeline/Constants.cpp
@@ -14,8 +14,8 @@
 
 #include "Constants.hpp"
 
-#include "System/Math.hpp"
 #include "System/Half.hpp"
+#include "System/Math.hpp"
 
 #include <cstring>
 
@@ -25,8 +25,7 @@
 
 Constants::Constants()
 {
-	static const unsigned int transposeBit0[16] =
-	{
+	static const unsigned int transposeBit0[16] = {
 		0x00000000,
 		0x00000001,
 		0x00000010,
@@ -45,8 +44,7 @@
 		0x00001111
 	};
 
-	static const unsigned int transposeBit1[16] =
-	{
+	static const unsigned int transposeBit1[16] = {
 		0x00000000,
 		0x00000002,
 		0x00000020,
@@ -65,8 +63,7 @@
 		0x00002222
 	};
 
-	static const unsigned int transposeBit2[16] =
-	{
+	static const unsigned int transposeBit2[16] = {
 		0x00000000,
 		0x00000004,
 		0x00000040,
@@ -89,74 +86,71 @@
 	memcpy(&this->transposeBit1, transposeBit1, sizeof(transposeBit1));
 	memcpy(&this->transposeBit2, transposeBit2, sizeof(transposeBit2));
 
-	static const ushort4 cWeight[17] =
-	{
-		{0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF},   // 0xFFFF / 1  = 0xFFFF
-		{0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF},   // 0xFFFF / 1  = 0xFFFF
-		{0x8000, 0x8000, 0x8000, 0x8000},   // 0xFFFF / 2  = 0x8000
-		{0x5555, 0x5555, 0x5555, 0x5555},   // 0xFFFF / 3  = 0x5555
-		{0x4000, 0x4000, 0x4000, 0x4000},   // 0xFFFF / 4  = 0x4000
-		{0x3333, 0x3333, 0x3333, 0x3333},   // 0xFFFF / 5  = 0x3333
-		{0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA},   // 0xFFFF / 6  = 0x2AAA
-		{0x2492, 0x2492, 0x2492, 0x2492},   // 0xFFFF / 7  = 0x2492
-		{0x2000, 0x2000, 0x2000, 0x2000},   // 0xFFFF / 8  = 0x2000
-		{0x1C71, 0x1C71, 0x1C71, 0x1C71},   // 0xFFFF / 9  = 0x1C71
-		{0x1999, 0x1999, 0x1999, 0x1999},   // 0xFFFF / 10 = 0x1999
-		{0x1745, 0x1745, 0x1745, 0x1745},   // 0xFFFF / 11 = 0x1745
-		{0x1555, 0x1555, 0x1555, 0x1555},   // 0xFFFF / 12 = 0x1555
-		{0x13B1, 0x13B1, 0x13B1, 0x13B1},   // 0xFFFF / 13 = 0x13B1
-		{0x1249, 0x1249, 0x1249, 0x1249},   // 0xFFFF / 14 = 0x1249
-		{0x1111, 0x1111, 0x1111, 0x1111},   // 0xFFFF / 15 = 0x1111
-		{0x1000, 0x1000, 0x1000, 0x1000},   // 0xFFFF / 16 = 0x1000
+	static const ushort4 cWeight[17] = {
+		{ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF },  // 0xFFFF / 1  = 0xFFFF
+		{ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF },  // 0xFFFF / 1  = 0xFFFF
+		{ 0x8000, 0x8000, 0x8000, 0x8000 },  // 0xFFFF / 2  = 0x8000
+		{ 0x5555, 0x5555, 0x5555, 0x5555 },  // 0xFFFF / 3  = 0x5555
+		{ 0x4000, 0x4000, 0x4000, 0x4000 },  // 0xFFFF / 4  = 0x4000
+		{ 0x3333, 0x3333, 0x3333, 0x3333 },  // 0xFFFF / 5  = 0x3333
+		{ 0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA },  // 0xFFFF / 6  = 0x2AAA
+		{ 0x2492, 0x2492, 0x2492, 0x2492 },  // 0xFFFF / 7  = 0x2492
+		{ 0x2000, 0x2000, 0x2000, 0x2000 },  // 0xFFFF / 8  = 0x2000
+		{ 0x1C71, 0x1C71, 0x1C71, 0x1C71 },  // 0xFFFF / 9  = 0x1C71
+		{ 0x1999, 0x1999, 0x1999, 0x1999 },  // 0xFFFF / 10 = 0x1999
+		{ 0x1745, 0x1745, 0x1745, 0x1745 },  // 0xFFFF / 11 = 0x1745
+		{ 0x1555, 0x1555, 0x1555, 0x1555 },  // 0xFFFF / 12 = 0x1555
+		{ 0x13B1, 0x13B1, 0x13B1, 0x13B1 },  // 0xFFFF / 13 = 0x13B1
+		{ 0x1249, 0x1249, 0x1249, 0x1249 },  // 0xFFFF / 14 = 0x1249
+		{ 0x1111, 0x1111, 0x1111, 0x1111 },  // 0xFFFF / 15 = 0x1111
+		{ 0x1000, 0x1000, 0x1000, 0x1000 },  // 0xFFFF / 16 = 0x1000
 	};
 
-	static const float4 uvWeight[17] =
-	{
-		{1.0f / 1.0f,  1.0f / 1.0f,  1.0f / 1.0f,  1.0f / 1.0f},
-		{1.0f / 1.0f,  1.0f / 1.0f,  1.0f / 1.0f,  1.0f / 1.0f},
-		{1.0f / 2.0f,  1.0f / 2.0f,  1.0f / 2.0f,  1.0f / 2.0f},
-		{1.0f / 3.0f,  1.0f / 3.0f,  1.0f / 3.0f,  1.0f / 3.0f},
-		{1.0f / 4.0f,  1.0f / 4.0f,  1.0f / 4.0f,  1.0f / 4.0f},
-		{1.0f / 5.0f,  1.0f / 5.0f,  1.0f / 5.0f,  1.0f / 5.0f},
-		{1.0f / 6.0f,  1.0f / 6.0f,  1.0f / 6.0f,  1.0f / 6.0f},
-		{1.0f / 7.0f,  1.0f / 7.0f,  1.0f / 7.0f,  1.0f / 7.0f},
-		{1.0f / 8.0f,  1.0f / 8.0f,  1.0f / 8.0f,  1.0f / 8.0f},
-		{1.0f / 9.0f,  1.0f / 9.0f,  1.0f / 9.0f,  1.0f / 9.0f},
-		{1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f},
-		{1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f},
-		{1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f},
-		{1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f},
-		{1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f},
-		{1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f},
-		{1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f},
+	static const float4 uvWeight[17] = {
+		{ 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f },
+		{ 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f },
+		{ 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f },
+		{ 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f },
+		{ 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f },
+		{ 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f },
+		{ 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f },
+		{ 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f },
+		{ 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f },
+		{ 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f },
+		{ 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f },
+		{ 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f },
+		{ 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f },
+		{ 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f },
+		{ 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f },
+		{ 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f },
+		{ 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f },
 	};
 
-	static const float4 uvStart[17] =
-	{
-		{-0.0f / 2.0f,   -0.0f / 2.0f,   -0.0f / 2.0f,   -0.0f / 2.0f},
-		{-0.0f / 2.0f,   -0.0f / 2.0f,   -0.0f / 2.0f,   -0.0f / 2.0f},
-		{-1.0f / 4.0f,   -1.0f / 4.0f,   -1.0f / 4.0f,   -1.0f / 4.0f},
-		{-2.0f / 6.0f,   -2.0f / 6.0f,   -2.0f / 6.0f,   -2.0f / 6.0f},
-		{-3.0f / 8.0f,   -3.0f / 8.0f,   -3.0f / 8.0f,   -3.0f / 8.0f},
-		{-4.0f / 10.0f,  -4.0f / 10.0f,  -4.0f / 10.0f,  -4.0f / 10.0f},
-		{-5.0f / 12.0f,  -5.0f / 12.0f,  -5.0f / 12.0f,  -5.0f / 12.0f},
-		{-6.0f / 14.0f,  -6.0f / 14.0f,  -6.0f / 14.0f,  -6.0f / 14.0f},
-		{-7.0f / 16.0f,  -7.0f / 16.0f,  -7.0f / 16.0f,  -7.0f / 16.0f},
-		{-8.0f / 18.0f,  -8.0f / 18.0f,  -8.0f / 18.0f,  -8.0f / 18.0f},
-		{-9.0f / 20.0f,  -9.0f / 20.0f,  -9.0f / 20.0f,  -9.0f / 20.0f},
-		{-10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f},
-		{-11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f},
-		{-12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f},
-		{-13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f},
-		{-14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f},
-		{-15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f},
+	static const float4 uvStart[17] = {
+		{ -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f },
+		{ -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f },
+		{ -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f },
+		{ -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f },
+		{ -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f },
+		{ -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f },
+		{ -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f },
+		{ -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f },
+		{ -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f },
+		{ -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f },
+		{ -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f },
+		{ -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f },
+		{ -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f },
+		{ -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f },
+		{ -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f },
+		{ -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f },
+		{ -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f },
 	};
 
 	memcpy(&this->cWeight, cWeight, sizeof(cWeight));
 	memcpy(&this->uvWeight, uvWeight, sizeof(uvWeight));
 	memcpy(&this->uvStart, uvStart, sizeof(uvStart));
 
-	static const unsigned int occlusionCount[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
+	static const unsigned int occlusionCount[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
 
 	memcpy(&this->occlusionCount, &occlusionCount, sizeof(occlusionCount));
 
@@ -253,24 +247,24 @@
 
 	for(int i = 0; i < 4; i++)
 	{
-		maskW01Q[i][0] =  -(i >> 0 & 1);
-		maskW01Q[i][1] =  -(i >> 1 & 1);
-		maskW01Q[i][2] =  -(i >> 0 & 1);
-		maskW01Q[i][3] =  -(i >> 1 & 1);
+		maskW01Q[i][0] = -(i >> 0 & 1);
+		maskW01Q[i][1] = -(i >> 1 & 1);
+		maskW01Q[i][2] = -(i >> 0 & 1);
+		maskW01Q[i][3] = -(i >> 1 & 1);
 
-		maskD01X[i][0] =  -(i >> 0 & 1);
-		maskD01X[i][1] =  -(i >> 1 & 1);
-		maskD01X[i][2] =  -(i >> 0 & 1);
-		maskD01X[i][3] =  -(i >> 1 & 1);
+		maskD01X[i][0] = -(i >> 0 & 1);
+		maskD01X[i][1] = -(i >> 1 & 1);
+		maskD01X[i][2] = -(i >> 0 & 1);
+		maskD01X[i][3] = -(i >> 1 & 1);
 	}
 
 	for(int i = 0; i < 16; i++)
 	{
 		mask10Q[i][0] = mask10Q[i][1] =
-				(i & 0x1 ? 0x3FF : 0) |
-				(i & 0x2 ? 0xFFC00 : 0) |
-				(i & 0x4 ? 0x3FF00000 : 0) |
-				(i & 0x8 ? 0xC0000000 : 0);
+		    (i & 0x1 ? 0x3FF : 0) |
+		    (i & 0x2 ? 0xFFC00 : 0) |
+		    (i & 0x4 ? 0x3FF00000 : 0) |
+		    (i & 0x8 ? 0xC0000000 : 0);
 	}
 
 	for(int i = 0; i < 256; i++)
@@ -327,13 +321,13 @@
 	memcpy(&this->X, &X, sizeof(X));
 	memcpy(&this->Y, &Y, sizeof(Y));
 
-	const dword maxX[16] = {0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101};
-	const dword maxY[16] = {0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202};
-	const dword maxZ[16] = {0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404};
-	const dword minX[16] = {0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808};
-	const dword minY[16] = {0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010};
-	const dword minZ[16] = {0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020};
-	const dword fini[16] = {0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080};
+	const dword maxX[16] = { 0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101 };
+	const dword maxY[16] = { 0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202 };
+	const dword maxZ[16] = { 0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404 };
+	const dword minX[16] = { 0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808 };
+	const dword minY[16] = { 0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010 };
+	const dword minZ[16] = { 0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020 };
+	const dword fini[16] = { 0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080 };
 
 	memcpy(&this->maxX, &maxX, sizeof(maxX));
 	memcpy(&this->maxY, &maxY, sizeof(maxY));
@@ -343,17 +337,17 @@
 	memcpy(&this->minZ, &minZ, sizeof(minZ));
 	memcpy(&this->fini, &fini, sizeof(fini));
 
-	static const dword4 maxPos = {0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE};
+	static const dword4 maxPos = { 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE };
 
 	memcpy(&this->maxPos, &maxPos, sizeof(maxPos));
 
-	static const float4 unscaleByte = {1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF};
-	static const float4 unscaleSByte = {1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F};
-	static const float4 unscaleShort = {1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF};
-	static const float4 unscaleUShort = {1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF};
-	static const float4 unscaleInt = {1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF};
-	static const float4 unscaleUInt = {1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF};
-	static const float4 unscaleFixed = {1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000};
+	static const float4 unscaleByte = { 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF };
+	static const float4 unscaleSByte = { 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F };
+	static const float4 unscaleShort = { 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF };
+	static const float4 unscaleUShort = { 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF };
+	static const float4 unscaleInt = { 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF };
+	static const float4 unscaleUInt = { 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF };
+	static const float4 unscaleFixed = { 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000 };
 
 	memcpy(&this->unscaleByte, &unscaleByte, sizeof(unscaleByte));
 	memcpy(&this->unscaleSByte, &unscaleSByte, sizeof(unscaleSByte));
@@ -365,7 +359,7 @@
 
 	for(int i = 0; i <= 0xFFFF; i++)
 	{
-		half2float[i] = (float)reinterpret_cast<half&>(i);
+		half2float[i] = (float)reinterpret_cast<half &>(i);
 	}
 }
 
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp
index 484682b..c9c8a1f 100644
--- a/src/Pipeline/Constants.hpp
+++ b/src/Pipeline/Constants.hpp
@@ -15,8 +15,8 @@
 #ifndef sw_Constants_hpp
 #define sw_Constants_hpp
 
-#include "System/Types.hpp"
 #include "System/Math.hpp"
+#include "System/Types.hpp"
 #include "Vulkan/VkConfig.h"
 
 namespace sw {
@@ -68,8 +68,8 @@
 	word4 maskW01Q[4];
 	dword4 maskD01X[4];
 	word4 mask565Q[8];
-	dword2 mask10Q[16];		// 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x
-	word4 mask5551Q[16];	// 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x
+	dword2 mask10Q[16];   // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x
+	word4 mask5551Q[16];  // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x
 
 	unsigned short sRGBtoLinear8_16[256];
 
@@ -91,10 +91,10 @@
 	// VK_SAMPLE_COUNT_4_BIT
 	// https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#primsrast-multisampling
 	static constexpr float VkSampleLocations4[][2] = {
-		{0.375, 0.125},
-		{0.875, 0.375},
-		{0.125, 0.625},
-		{0.625, 0.875},
+		{ 0.375, 0.125 },
+		{ 0.875, 0.375 },
+		{ 0.125, 0.625 },
+		{ 0.625, 0.875 },
 	};
 
 	// Vulkan spec sample positions are relative to 0,0 in top left corner, with Y+ going down.
@@ -141,6 +141,6 @@
 
 extern Constants constants;
 
-}  // namepsace sw
+}  // namespace sw
 
-#endif   // sw_Constants_hpp
+#endif  // sw_Constants_hpp
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 332f82e..21296ed 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -56,48 +56,43 @@
 	return mask;
 }
 
-void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4])
+void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4])
 {
 	routine.setImmutableInputBuiltins(spirvShader);
 
-	routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		assert(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID)))));
 	});
 
-	routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		assert(builtin.SizeInComponents == 4);
-		value[builtin.FirstComponent+0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f);
-		value[builtin.FirstComponent+1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f);
-		value[builtin.FirstComponent+2] = z[0];	// sample 0
-		value[builtin.FirstComponent+3] = w;
+		value[builtin.FirstComponent + 0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f);
+		value[builtin.FirstComponent + 1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f);
+		value[builtin.FirstComponent + 2] = z[0];  // sample 0
+		value[builtin.FirstComponent + 3] = w;
 	});
 
-	routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		assert(builtin.SizeInComponents == 2);
-		value[builtin.FirstComponent+0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) +
-			SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX))));
-		value[builtin.FirstComponent+1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) +
-			SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY))));
+		value[builtin.FirstComponent + 0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) +
+		                                    SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX))));
+		value[builtin.FirstComponent + 1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) +
+		                                    SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY))));
 	});
 
-	routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		assert(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
 	});
 
-	routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		assert(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask));
 	});
 
-	routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1);
-	routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1);
+	routine.windowSpacePosition[0] = x + SIMD::Int(0, 1, 0, 1);
+	routine.windowSpacePosition[1] = y + SIMD::Int(0, 0, 1, 1);
 	routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID));
 }
 
@@ -169,7 +164,7 @@
 
 		for(auto i = 0u; i < state.multiSample; i++)
 		{
-			cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1<<i), SIMD::Int(0)));
+			cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << i), SIMD::Int(0)));
 		}
 	}
 
@@ -211,78 +206,78 @@
 		auto format = state.targetFormat[index];
 		switch(format)
 		{
-		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
-		case VK_FORMAT_R5G6B5_UNORM_PACK16:
-		case VK_FORMAT_B8G8R8A8_UNORM:
-		case VK_FORMAT_B8G8R8A8_SRGB:
-		case VK_FORMAT_R8G8B8A8_UNORM:
-		case VK_FORMAT_R8G8B8A8_SRGB:
-		case VK_FORMAT_R8G8_UNORM:
-		case VK_FORMAT_R8_UNORM:
-		case VK_FORMAT_R16G16_UNORM:
-		case VK_FORMAT_R16G16B16A16_UNORM:
-		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
-		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
-			for(unsigned int q = 0; q < state.multiSample; q++)
-			{
-				if(state.multiSampleMask & (1 << q))
+			case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+			case VK_FORMAT_R5G6B5_UNORM_PACK16:
+			case VK_FORMAT_B8G8R8A8_UNORM:
+			case VK_FORMAT_B8G8R8A8_SRGB:
+			case VK_FORMAT_R8G8B8A8_UNORM:
+			case VK_FORMAT_R8G8B8A8_SRGB:
+			case VK_FORMAT_R8G8_UNORM:
+			case VK_FORMAT_R8_UNORM:
+			case VK_FORMAT_R16G16_UNORM:
+			case VK_FORMAT_R16G16B16A16_UNORM:
+			case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+			case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+			case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+				for(unsigned int q = 0; q < state.multiSample; q++)
 				{
-					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
-					Vector4s color;
+					if(state.multiSampleMask & (1 << q))
+					{
+						Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
+						Vector4s color;
 
-					color.x = convertFixed16(c[index].x, false);
-					color.y = convertFixed16(c[index].y, false);
-					color.z = convertFixed16(c[index].z, false);
-					color.w = convertFixed16(c[index].w, false);
+						color.x = convertFixed16(c[index].x, false);
+						color.y = convertFixed16(c[index].y, false);
+						color.z = convertFixed16(c[index].z, false);
+						color.w = convertFixed16(c[index].w, false);
 
-					alphaBlend(index, buffer, color, x);
-					writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+						alphaBlend(index, buffer, color, x);
+						writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+					}
 				}
-			}
-			break;
-		case VK_FORMAT_R16_SFLOAT:
-		case VK_FORMAT_R16G16_SFLOAT:
-		case VK_FORMAT_R16G16B16A16_SFLOAT:
-		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
-		case VK_FORMAT_R32_SFLOAT:
-		case VK_FORMAT_R32G32_SFLOAT:
-		case VK_FORMAT_R32G32B32A32_SFLOAT:
-		case VK_FORMAT_R32_SINT:
-		case VK_FORMAT_R32G32_SINT:
-		case VK_FORMAT_R32G32B32A32_SINT:
-		case VK_FORMAT_R32_UINT:
-		case VK_FORMAT_R32G32_UINT:
-		case VK_FORMAT_R32G32B32A32_UINT:
-		case VK_FORMAT_R16_SINT:
-		case VK_FORMAT_R16G16_SINT:
-		case VK_FORMAT_R16G16B16A16_SINT:
-		case VK_FORMAT_R16_UINT:
-		case VK_FORMAT_R16G16_UINT:
-		case VK_FORMAT_R16G16B16A16_UINT:
-		case VK_FORMAT_R8_SINT:
-		case VK_FORMAT_R8G8_SINT:
-		case VK_FORMAT_R8G8B8A8_SINT:
-		case VK_FORMAT_R8_UINT:
-		case VK_FORMAT_R8G8_UINT:
-		case VK_FORMAT_R8G8B8A8_UINT:
-		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
-		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
-		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
-			for(unsigned int q = 0; q < state.multiSample; q++)
-			{
-				if(state.multiSampleMask & (1 << q))
+				break;
+			case VK_FORMAT_R16_SFLOAT:
+			case VK_FORMAT_R16G16_SFLOAT:
+			case VK_FORMAT_R16G16B16A16_SFLOAT:
+			case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+			case VK_FORMAT_R32_SFLOAT:
+			case VK_FORMAT_R32G32_SFLOAT:
+			case VK_FORMAT_R32G32B32A32_SFLOAT:
+			case VK_FORMAT_R32_SINT:
+			case VK_FORMAT_R32G32_SINT:
+			case VK_FORMAT_R32G32B32A32_SINT:
+			case VK_FORMAT_R32_UINT:
+			case VK_FORMAT_R32G32_UINT:
+			case VK_FORMAT_R32G32B32A32_UINT:
+			case VK_FORMAT_R16_SINT:
+			case VK_FORMAT_R16G16_SINT:
+			case VK_FORMAT_R16G16B16A16_SINT:
+			case VK_FORMAT_R16_UINT:
+			case VK_FORMAT_R16G16_UINT:
+			case VK_FORMAT_R16G16B16A16_UINT:
+			case VK_FORMAT_R8_SINT:
+			case VK_FORMAT_R8G8_SINT:
+			case VK_FORMAT_R8G8B8A8_SINT:
+			case VK_FORMAT_R8_UINT:
+			case VK_FORMAT_R8G8_UINT:
+			case VK_FORMAT_R8G8B8A8_UINT:
+			case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+			case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+			case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+				for(unsigned int q = 0; q < state.multiSample; q++)
 				{
-					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
-					Vector4f color = c[index];
+					if(state.multiSampleMask & (1 << q))
+					{
+						Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
+						Vector4f color = c[index];
 
-					alphaBlend(index, buffer, color, x);
-					writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+						alphaBlend(index, buffer, color, x);
+						writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+					}
 				}
-			}
-			break;
-		default:
-			UNIMPLEMENTED("VkFormat: %d", int(format));
+				break;
+			default:
+				UNIMPLEMENTED("VkFormat: %d", int(format));
 		}
 	}
 }
@@ -298,62 +293,66 @@
 
 		switch(state.targetFormat[index])
 		{
-		case VK_FORMAT_UNDEFINED:
-			break;
-		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
-		case VK_FORMAT_R5G6B5_UNORM_PACK16:
-		case VK_FORMAT_B8G8R8A8_UNORM:
-		case VK_FORMAT_B8G8R8A8_SRGB:
-		case VK_FORMAT_R8G8B8A8_UNORM:
-		case VK_FORMAT_R8G8B8A8_SRGB:
-		case VK_FORMAT_R8G8_UNORM:
-		case VK_FORMAT_R8_UNORM:
-		case VK_FORMAT_R16G16_UNORM:
-		case VK_FORMAT_R16G16B16A16_UNORM:
-		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
-		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
-			oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
-			oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
-			oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
-			oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
-			break;
-		case VK_FORMAT_R32_SFLOAT:
-		case VK_FORMAT_R32G32_SFLOAT:
-		case VK_FORMAT_R32G32B32A32_SFLOAT:
-		case VK_FORMAT_R32_SINT:
-		case VK_FORMAT_R32G32_SINT:
-		case VK_FORMAT_R32G32B32A32_SINT:
-		case VK_FORMAT_R32_UINT:
-		case VK_FORMAT_R32G32_UINT:
-		case VK_FORMAT_R32G32B32A32_UINT:
-		case VK_FORMAT_R16_SFLOAT:
-		case VK_FORMAT_R16G16_SFLOAT:
-		case VK_FORMAT_R16G16B16A16_SFLOAT:
-		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
-		case VK_FORMAT_R16_SINT:
-		case VK_FORMAT_R16G16_SINT:
-		case VK_FORMAT_R16G16B16A16_SINT:
-		case VK_FORMAT_R16_UINT:
-		case VK_FORMAT_R16G16_UINT:
-		case VK_FORMAT_R16G16B16A16_UINT:
-		case VK_FORMAT_R8_SINT:
-		case VK_FORMAT_R8G8_SINT:
-		case VK_FORMAT_R8G8B8A8_SINT:
-		case VK_FORMAT_R8_UINT:
-		case VK_FORMAT_R8G8_UINT:
-		case VK_FORMAT_R8G8B8A8_UINT:
-		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
-		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
-		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
-			break;
-		default:
-			UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+			case VK_FORMAT_UNDEFINED:
+				break;
+			case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+			case VK_FORMAT_R5G6B5_UNORM_PACK16:
+			case VK_FORMAT_B8G8R8A8_UNORM:
+			case VK_FORMAT_B8G8R8A8_SRGB:
+			case VK_FORMAT_R8G8B8A8_UNORM:
+			case VK_FORMAT_R8G8B8A8_SRGB:
+			case VK_FORMAT_R8G8_UNORM:
+			case VK_FORMAT_R8_UNORM:
+			case VK_FORMAT_R16G16_UNORM:
+			case VK_FORMAT_R16G16B16A16_UNORM:
+			case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+			case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+			case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+				oC[index].x = Max(oC[index].x, Float4(0.0f));
+				oC[index].x = Min(oC[index].x, Float4(1.0f));
+				oC[index].y = Max(oC[index].y, Float4(0.0f));
+				oC[index].y = Min(oC[index].y, Float4(1.0f));
+				oC[index].z = Max(oC[index].z, Float4(0.0f));
+				oC[index].z = Min(oC[index].z, Float4(1.0f));
+				oC[index].w = Max(oC[index].w, Float4(0.0f));
+				oC[index].w = Min(oC[index].w, Float4(1.0f));
+				break;
+			case VK_FORMAT_R32_SFLOAT:
+			case VK_FORMAT_R32G32_SFLOAT:
+			case VK_FORMAT_R32G32B32A32_SFLOAT:
+			case VK_FORMAT_R32_SINT:
+			case VK_FORMAT_R32G32_SINT:
+			case VK_FORMAT_R32G32B32A32_SINT:
+			case VK_FORMAT_R32_UINT:
+			case VK_FORMAT_R32G32_UINT:
+			case VK_FORMAT_R32G32B32A32_UINT:
+			case VK_FORMAT_R16_SFLOAT:
+			case VK_FORMAT_R16G16_SFLOAT:
+			case VK_FORMAT_R16G16B16A16_SFLOAT:
+			case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+			case VK_FORMAT_R16_SINT:
+			case VK_FORMAT_R16G16_SINT:
+			case VK_FORMAT_R16G16B16A16_SINT:
+			case VK_FORMAT_R16_UINT:
+			case VK_FORMAT_R16G16_UINT:
+			case VK_FORMAT_R16G16B16A16_UINT:
+			case VK_FORMAT_R8_SINT:
+			case VK_FORMAT_R8G8_SINT:
+			case VK_FORMAT_R8G8B8A8_SINT:
+			case VK_FORMAT_R8_UINT:
+			case VK_FORMAT_R8G8_UINT:
+			case VK_FORMAT_R8G8B8A8_UINT:
+			case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+			case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+			case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+				break;
+			default:
+				UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
 		}
 	}
 }
 
-Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
+Float4 PixelProgram::linearToSRGB(const Float4 &x)  // Approximates x^(1.0/2.2)
 {
 	Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
 	Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
@@ -361,4 +360,4 @@
 	return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
 }
 
-}  // namepsace sw
+}  // namespace sw
diff --git a/src/Pipeline/PixelProgram.hpp b/src/Pipeline/PixelProgram.hpp
index 7888115..59994fb 100644
--- a/src/Pipeline/PixelProgram.hpp
+++ b/src/Pipeline/PixelProgram.hpp
@@ -23,18 +23,18 @@
 {
 public:
 	PixelProgram(
-			const PixelProcessor::State &state,
-			vk::PipelineLayout const *pipelineLayout,
-			SpirvShader const *spirvShader,
-			const vk::DescriptorSet::Bindings &descriptorSets) :
-		PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets)
+	    const PixelProcessor::State &state,
+	    vk::PipelineLayout const *pipelineLayout,
+	    SpirvShader const *spirvShader,
+	    const vk::DescriptorSet::Bindings &descriptorSets)
+	    : PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets)
 	{
 	}
 
 	virtual ~PixelProgram() {}
 
 protected:
-	virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]);
+	virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]);
 	virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]);
 	virtual Bool alphaTest(Int cMask[4]);
 	virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 6b06146..b2ae9d0 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -14,24 +14,24 @@
 
 #include "PixelRoutine.hpp"
 
-#include "SamplerCore.hpp"
 #include "Constants.hpp"
-#include "Device/Renderer.hpp"
-#include "Device/QuadRasterizer.hpp"
+#include "SamplerCore.hpp"
 #include "Device/Primitive.hpp"
+#include "Device/QuadRasterizer.hpp"
+#include "Device/Renderer.hpp"
 #include "Vulkan/VkDebug.hpp"
 #include "Vulkan/VkPipelineLayout.hpp"
 
 namespace sw {
 
 PixelRoutine::PixelRoutine(
-		const PixelProcessor::State &state,
-		vk::PipelineLayout const *pipelineLayout,
-		SpirvShader const *spirvShader,
-		const vk::DescriptorSet::Bindings &descriptorSets)
-	: QuadRasterizer(state, spirvShader),
-	  routine(pipelineLayout),
-	  descriptorSets(descriptorSets)
+    const PixelProcessor::State &state,
+    vk::PipelineLayout const *pipelineLayout,
+    SpirvShader const *spirvShader,
+    const vk::DescriptorSet::Bindings &descriptorSets)
+    : QuadRasterizer(state, spirvShader)
+    , routine(pipelineLayout)
+    , descriptorSets(descriptorSets)
 {
 	if(spirvShader)
 	{
@@ -55,8 +55,8 @@
 	// TODO: consider shader which modifies sample mask in general
 	const bool earlyDepthTest = !spirvShader || (spirvShader->getModes().EarlyFragmentTests && !spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
 
-	Int zMask[4];   // Depth mask
-	Int sMask[4];   // Stencil mask
+	Int zMask[4];  // Depth mask
+	Int sMask[4];  // Stencil mask
 
 	for(unsigned int q = 0; q < state.multiSample; q++)
 	{
@@ -72,7 +72,7 @@
 	Float4 f;
 	Float4 rhwCentroid;
 
-	Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
+	Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16);
 
 	if(interpolateZ())
 	{
@@ -82,10 +82,10 @@
 
 			if(state.multiSample > 1)
 			{
-				x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
+				x -= *Pointer<Float4>(constants + OFFSET(Constants, X) + q * sizeof(float4));
 			}
 
-			z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
+			z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive, z), false, false, state.depthClamp);
 		}
 	}
 
@@ -101,7 +101,7 @@
 
 	If(depthPass || Bool(!earlyDepthTest))
 	{
-		Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
+		Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16);
 
 		// Centroid locations
 		Float4 XXXX = Float4(0.0f);
@@ -113,9 +113,9 @@
 
 			for(unsigned int q = 0; q < state.multiSample; q++)
 			{
-				XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
-				YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
-				WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
+				XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]);
+				YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]);
+				WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]);
 			}
 
 			WWWW = Rcp_pp(WWWW);
@@ -128,12 +128,12 @@
 
 		if(interpolateW())
 		{
-			w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false);
+			w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive, w), false, false, false);
 			rhw = reciprocal(w, false, false, true);
 
 			if(state.centroid)
 			{
-				rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
+				rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, w), false, false));
 			}
 		}
 
@@ -147,16 +147,16 @@
 					if(input.Centroid && state.multiSample > 1)
 					{
 						routine.inputs[interpolant] =
-								interpolateCentroid(XXXX, YYYY, rhwCentroid,
-													primitive + OFFSET(Primitive, V[interpolant]),
-													input.Flat, !input.NoPerspective);
+						    interpolateCentroid(XXXX, YYYY, rhwCentroid,
+						                        primitive + OFFSET(Primitive, V[interpolant]),
+						                        input.Flat, !input.NoPerspective);
 					}
 					else
 					{
 						routine.inputs[interpolant] =
-								interpolate(xxxx, Dv[interpolant], rhw,
-											primitive + OFFSET(Primitive, V[interpolant]),
-											input.Flat, !input.NoPerspective, false);
+						    interpolate(xxxx, Dv[interpolant], rhw,
+						                primitive + OFFSET(Primitive, V[interpolant]),
+						                input.Flat, !input.NoPerspective, false);
 					}
 				}
 			}
@@ -166,8 +166,8 @@
 			for(uint32_t i = 0; i < state.numClipDistances; i++)
 			{
 				auto distance = interpolate(xxxx, DclipDistance[i], rhw,
-											primitive + OFFSET(Primitive, clipDistance[i]),
-											false, true, false);
+				                            primitive + OFFSET(Primitive, clipDistance[i]),
+				                            false, true, false);
 
 				auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0)));
 				for(auto ms = 0u; ms < state.multiSample; ms++)
@@ -202,9 +202,9 @@
 						if(i < it->second.SizeInComponents)
 						{
 							routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
-									interpolate(xxxx, DcullDistance[i], rhw,
-												primitive + OFFSET(Primitive, cullDistance[i]),
-												false, true, false);
+							    interpolate(xxxx, DcullDistance[i], rhw,
+							                primitive + OFFSET(Primitive, cullDistance[i]),
+							                false, true, false);
 						}
 					}
 				}
@@ -250,7 +250,7 @@
 
 						if(state.occlusionEnabled)
 						{
-							occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
+							occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q]));
 						}
 					}
 				}
@@ -271,12 +271,12 @@
 
 Float4 PixelRoutine::interpolateCentroid(const Float4 &x, const Float4 &y, const Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
 {
-	Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
+	Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, C), 16);
 
 	if(!flat)
 	{
-		interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
-		               y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
+		interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16) +
+		               y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, B), 16);
 
 		if(perspective)
 		{
@@ -300,7 +300,7 @@
 
 	if(q > 0)
 	{
-		buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB));
 	}
 
 	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
@@ -310,20 +310,20 @@
 
 	if(state.frontStencil.compareMask != 0xff)
 	{
-		value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
+		value &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].testMaskQ));
 	}
 
 	stencilTest(value, state.frontStencil.compareOp, false);
 
 	if(state.backStencil.compareMask != 0xff)
 	{
-		valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
+		valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].testMaskQ));
 	}
 
 	stencilTest(valueBack, state.backStencil.compareOp, true);
 
-	value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
-	valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
+	value &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask));
+	valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask));
 	value |= valueBack;
 
 	sMask = SignMask(value) & cMask;
@@ -335,43 +335,43 @@
 
 	switch(stencilCompareMode)
 	{
-	case VK_COMPARE_OP_ALWAYS:
-		value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-		break;
-	case VK_COMPARE_OP_NEVER:
-		value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-		break;
-	case VK_COMPARE_OP_LESS:			// a < b ~ b > a
-		value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-		value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
-		break;
-	case VK_COMPARE_OP_EQUAL:
-		value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
-		break;
-	case VK_COMPARE_OP_NOT_EQUAL:		// a != b ~ !(a == b)
-		value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
-		value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-		break;
-	case VK_COMPARE_OP_LESS_OR_EQUAL:	// a <= b ~ (b > a) || (a == b)
-		equal = value;
-		equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
-		value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-		value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
-		value |= equal;
-		break;
-	case VK_COMPARE_OP_GREATER:		// a > b
-		equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ));
-		value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-		equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
-		value = equal;
-		break;
-	case VK_COMPARE_OP_GREATER_OR_EQUAL:	// a >= b ~ !(a < b) ~ !(b > a)
-		value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-		value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
-		value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-		break;
-	default:
-		UNIMPLEMENTED("VkCompareOp: %d", int(stencilCompareMode));
+		case VK_COMPARE_OP_ALWAYS:
+			value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+			break;
+		case VK_COMPARE_OP_NEVER:
+			value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+			break;
+		case VK_COMPARE_OP_LESS:  // a < b ~ b > a
+			value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+			value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
+			break;
+		case VK_COMPARE_OP_EQUAL:
+			value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
+			break;
+		case VK_COMPARE_OP_NOT_EQUAL:  // a != b ~ !(a == b)
+			value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
+			value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+			break;
+		case VK_COMPARE_OP_LESS_OR_EQUAL:  // a <= b ~ (b > a) || (a == b)
+			equal = value;
+			equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
+			value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+			value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
+			value |= equal;
+			break;
+		case VK_COMPARE_OP_GREATER:  // a > b
+			equal = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ));
+			value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+			equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
+			value = equal;
+			break;
+		case VK_COMPARE_OP_GREATER_OR_EQUAL:  // a >= b ~ !(a < b) ~ !(b > a)
+			value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+			value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
+			value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+			break;
+		default:
+			UNIMPLEMENTED("VkCompareOp: %d", int(stencilCompareMode));
 	}
 }
 
@@ -385,11 +385,11 @@
 	}
 
 	Pointer<Byte> buffer = zBuffer + 4 * x;
-	Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
+	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
 
 	if(q > 0)
 	{
-		buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
 	}
 
 	Float4 zValue;
@@ -405,45 +405,45 @@
 
 	switch(state.depthCompareMode)
 	{
-	case VK_COMPARE_OP_ALWAYS:
-		// Optimized
-		break;
-	case VK_COMPARE_OP_NEVER:
-		// Optimized
-		break;
-	case VK_COMPARE_OP_EQUAL:
-		zTest = CmpEQ(zValue, Z);
-		break;
-	case VK_COMPARE_OP_NOT_EQUAL:
-		zTest = CmpNEQ(zValue, Z);
-		break;
-	case VK_COMPARE_OP_LESS:
-		zTest = CmpNLE(zValue, Z);
-		break;
-	case VK_COMPARE_OP_GREATER_OR_EQUAL:
-		zTest = CmpLE(zValue, Z);
-		break;
-	case VK_COMPARE_OP_LESS_OR_EQUAL:
-		zTest = CmpNLT(zValue, Z);
-		break;
-	case VK_COMPARE_OP_GREATER:
-		zTest = CmpLT(zValue, Z);
-		break;
-	default:
-		UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
+		case VK_COMPARE_OP_ALWAYS:
+			// Optimized
+			break;
+		case VK_COMPARE_OP_NEVER:
+			// Optimized
+			break;
+		case VK_COMPARE_OP_EQUAL:
+			zTest = CmpEQ(zValue, Z);
+			break;
+		case VK_COMPARE_OP_NOT_EQUAL:
+			zTest = CmpNEQ(zValue, Z);
+			break;
+		case VK_COMPARE_OP_LESS:
+			zTest = CmpNLE(zValue, Z);
+			break;
+		case VK_COMPARE_OP_GREATER_OR_EQUAL:
+			zTest = CmpLE(zValue, Z);
+			break;
+		case VK_COMPARE_OP_LESS_OR_EQUAL:
+			zTest = CmpNLT(zValue, Z);
+			break;
+		case VK_COMPARE_OP_GREATER:
+			zTest = CmpLT(zValue, Z);
+			break;
+		default:
+			UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
 	}
 
 	switch(state.depthCompareMode)
 	{
-	case VK_COMPARE_OP_ALWAYS:
-		zMask = cMask;
-		break;
-	case VK_COMPARE_OP_NEVER:
-		zMask = 0x0;
-		break;
-	default:
-		zMask = SignMask(zTest) & cMask;
-		break;
+		case VK_COMPARE_OP_ALWAYS:
+			zMask = cMask;
+			break;
+		case VK_COMPARE_OP_NEVER:
+			zMask = 0x0;
+			break;
+		default:
+			zMask = SignMask(zTest) & cMask;
+			break;
 	}
 
 	if(state.stencilActive)
@@ -468,7 +468,7 @@
 
 	if(q > 0)
 	{
-		buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
 	}
 
 	Short4 zValue;
@@ -488,45 +488,45 @@
 
 	switch(state.depthCompareMode)
 	{
-	case VK_COMPARE_OP_ALWAYS:
-		// Optimized
-		break;
-	case VK_COMPARE_OP_NEVER:
-		// Optimized
-		break;
-	case VK_COMPARE_OP_EQUAL:
-		zTest = Int4(CmpEQ(zValue, Z));
-		break;
-	case VK_COMPARE_OP_NOT_EQUAL:
-		zTest = ~Int4(CmpEQ(zValue, Z));
-		break;
-	case VK_COMPARE_OP_LESS:
-		zTest = Int4(CmpGT(zValue, Z));
-		break;
-	case VK_COMPARE_OP_GREATER_OR_EQUAL:
-		zTest = ~Int4(CmpGT(zValue, Z));
-		break;
-	case VK_COMPARE_OP_LESS_OR_EQUAL:
-		zTest = ~Int4(CmpGT(Z, zValue));
-		break;
-	case VK_COMPARE_OP_GREATER:
-		zTest = Int4(CmpGT(Z, zValue));
-		break;
-	default:
-		UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
+		case VK_COMPARE_OP_ALWAYS:
+			// Optimized
+			break;
+		case VK_COMPARE_OP_NEVER:
+			// Optimized
+			break;
+		case VK_COMPARE_OP_EQUAL:
+			zTest = Int4(CmpEQ(zValue, Z));
+			break;
+		case VK_COMPARE_OP_NOT_EQUAL:
+			zTest = ~Int4(CmpEQ(zValue, Z));
+			break;
+		case VK_COMPARE_OP_LESS:
+			zTest = Int4(CmpGT(zValue, Z));
+			break;
+		case VK_COMPARE_OP_GREATER_OR_EQUAL:
+			zTest = ~Int4(CmpGT(zValue, Z));
+			break;
+		case VK_COMPARE_OP_LESS_OR_EQUAL:
+			zTest = ~Int4(CmpGT(Z, zValue));
+			break;
+		case VK_COMPARE_OP_GREATER:
+			zTest = Int4(CmpGT(Z, zValue));
+			break;
+		default:
+			UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
 	}
 
 	switch(state.depthCompareMode)
 	{
-	case VK_COMPARE_OP_ALWAYS:
-		zMask = cMask;
-		break;
-	case VK_COMPARE_OP_NEVER:
-		zMask = 0x0;
-		break;
-	default:
-		zMask = SignMask(zTest) & cMask;
-		break;
+		case VK_COMPARE_OP_ALWAYS:
+			zMask = cMask;
+			break;
+		case VK_COMPARE_OP_NEVER:
+			zMask = 0x0;
+			break;
+		default:
+			zMask = SignMask(zTest) & cMask;
+			break;
 	}
 
 	if(state.stencilActive)
@@ -552,10 +552,10 @@
 
 void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha)
 {
-	Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
-	Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
-	Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
-	Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
+	Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c0)));
+	Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c1)));
+	Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c2)));
+	Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c3)));
 
 	Int aMask0 = SignMask(coverage0);
 	Int aMask1 = SignMask(coverage1);
@@ -578,11 +578,11 @@
 	}
 
 	Pointer<Byte> buffer = zBuffer + 4 * x;
-	Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
+	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
 
 	if(q > 0)
 	{
-		buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
 	}
 
 	Float4 zValue;
@@ -594,8 +594,8 @@
 		zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
 	}
 
-	Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
-	zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
+	Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + zMask * 16, 16));
+	zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + zMask * 16, 16));
 	Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
 
 	// FIXME: Properly optimizes?
@@ -613,11 +613,11 @@
 	}
 
 	Pointer<Byte> buffer = zBuffer + 2 * x;
-	Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
+	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
 
 	if(q > 0)
 	{
-		buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
 	}
 
 	Short4 zValue;
@@ -629,15 +629,15 @@
 		zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
 	}
 
-	Z = Z & *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q) + zMask * 8, 8);
-	zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q) + zMask * 8, 8);
+	Z = Z & *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q) + zMask * 8, 8);
+	zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q) + zMask * 8, 8);
 	Z = Z | zValue;
 
 	// FIXME: Properly optimizes?
 	*Pointer<Short>(buffer) = Extract(Z, 0);
-	*Pointer<Short>(buffer+2) = Extract(Z, 1);
-	*Pointer<Short>(buffer+pitch) = Extract(Z, 2);
-	*Pointer<Short>(buffer+pitch+2) = Extract(Z, 3);
+	*Pointer<Short>(buffer + 2) = Extract(Z, 1);
+	*Pointer<Short>(buffer + pitch) = Extract(Z, 2);
+	*Pointer<Short>(buffer + pitch + 2) = Extract(Z, 3);
 }
 
 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
@@ -677,7 +677,7 @@
 
 	if(q > 0)
 	{
-		buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB));
 	}
 
 	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
@@ -686,11 +686,11 @@
 	Byte8 newValue;
 	stencilOperation(newValue, bufferValue, state.frontStencil, false, zMask, sMask);
 
-	if((state.frontStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer
+	if((state.frontStencil.writeMask & 0xFF) != 0xFF)  // Assume 8-bit stencil buffer
 	{
 		Byte8 maskedValue = bufferValue;
-		newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
-		maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
+		newValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].writeMaskQ));
+		maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].invWriteMaskQ));
 		newValue |= maskedValue;
 	}
 
@@ -698,20 +698,20 @@
 
 	stencilOperation(newValueBack, bufferValue, state.backStencil, true, zMask, sMask);
 
-	if((state.backStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer
+	if((state.backStencil.writeMask & 0xFF) != 0xFF)  // Assume 8-bit stencil buffer
 	{
 		Byte8 maskedValue = bufferValue;
-		newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
-		maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
+		newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].writeMaskQ));
+		maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].invWriteMaskQ));
 		newValueBack |= maskedValue;
 	}
 
-	newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
-	newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
+	newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask));
+	newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask));
 	newValue |= newValueBack;
 
-	newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
-	bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
+	newValue &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * cMask);
+	bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * cMask);
 	newValue |= bufferValue;
 
 	*Pointer<Short>(buffer) = Extract(As<Short4>(newValue), 0);
@@ -738,15 +738,15 @@
 
 	if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
 	{
-		if(state.depthTestActive && ops.depthFailOp != ops.passOp)   // zMask valid and values not the same
+		if(state.depthTestActive && ops.depthFailOp != ops.passOp)  // zMask valid and values not the same
 		{
-			pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
-			zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
+			pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * zMask);
+			zFail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * zMask);
 			pass |= zFail;
 		}
 
-		pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
-		fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
+		pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * sMask);
+		fail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * sMask);
 		pass |= fail;
 	}
 }
@@ -755,32 +755,32 @@
 {
 	switch(operation)
 	{
-	case VK_STENCIL_OP_KEEP:
-		output = bufferValue;
-		break;
-	case VK_STENCIL_OP_ZERO:
-		output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-		break;
-	case VK_STENCIL_OP_REPLACE:
-		output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceQ));
-		break;
-	case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
-		output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
-		break;
-	case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
-		output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
-		break;
-	case VK_STENCIL_OP_INVERT:
-		output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-		break;
-	case VK_STENCIL_OP_INCREMENT_AND_WRAP:
-		output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
-		break;
-	case VK_STENCIL_OP_DECREMENT_AND_WRAP:
-		output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
-		break;
-	default:
-		UNIMPLEMENTED("VkStencilOp: %d", int(operation));
+		case VK_STENCIL_OP_KEEP:
+			output = bufferValue;
+			break;
+		case VK_STENCIL_OP_ZERO:
+			output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+			break;
+		case VK_STENCIL_OP_REPLACE:
+			output = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ));
+			break;
+		case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
+			output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
+			break;
+		case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
+			output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
+			break;
+		case VK_STENCIL_OP_INVERT:
+			output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+			break;
+		case VK_STENCIL_OP_INCREMENT_AND_WRAP:
+			output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
+			break;
+		case VK_STENCIL_OP_DECREMENT_AND_WRAP:
+			output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
+			break;
+		default:
+			UNIMPLEMENTED("VkStencilOp: %d", int(operation));
 	}
 }
 
@@ -788,80 +788,80 @@
 {
 	switch(blendFactorActive)
 	{
-	case VK_BLEND_FACTOR_ZERO:
-		// Optimized
-		break;
-	case VK_BLEND_FACTOR_ONE:
-		// Optimized
-		break;
-	case VK_BLEND_FACTOR_SRC_COLOR:
-		blendFactor.x = current.x;
-		blendFactor.y = current.y;
-		blendFactor.z = current.z;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-		blendFactor.x = Short4(0xFFFFu) - current.x;
-		blendFactor.y = Short4(0xFFFFu) - current.y;
-		blendFactor.z = Short4(0xFFFFu) - current.z;
-		break;
-	case VK_BLEND_FACTOR_DST_COLOR:
-		blendFactor.x = pixel.x;
-		blendFactor.y = pixel.y;
-		blendFactor.z = pixel.z;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
-		blendFactor.x = Short4(0xFFFFu) - pixel.x;
-		blendFactor.y = Short4(0xFFFFu) - pixel.y;
-		blendFactor.z = Short4(0xFFFFu) - pixel.z;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA:
-		blendFactor.x = current.w;
-		blendFactor.y = current.w;
-		blendFactor.z = current.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-		blendFactor.x = Short4(0xFFFFu) - current.w;
-		blendFactor.y = Short4(0xFFFFu) - current.w;
-		blendFactor.z = Short4(0xFFFFu) - current.w;
-		break;
-	case VK_BLEND_FACTOR_DST_ALPHA:
-		blendFactor.x = pixel.w;
-		blendFactor.y = pixel.w;
-		blendFactor.z = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
-		blendFactor.x = Short4(0xFFFFu) - pixel.w;
-		blendFactor.y = Short4(0xFFFFu) - pixel.w;
-		blendFactor.z = Short4(0xFFFFu) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-		blendFactor.x = Short4(0xFFFFu) - pixel.w;
-		blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
-		blendFactor.y = blendFactor.x;
-		blendFactor.z = blendFactor.x;
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_COLOR:
-		blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
-		blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
-		blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
-		blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
-		blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
-		blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
-		blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
-		blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
-		blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
-		blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
-		blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
-		blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
-		break;
-	default:
-		UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
+		case VK_BLEND_FACTOR_ZERO:
+			// Optimized
+			break;
+		case VK_BLEND_FACTOR_ONE:
+			// Optimized
+			break;
+		case VK_BLEND_FACTOR_SRC_COLOR:
+			blendFactor.x = current.x;
+			blendFactor.y = current.y;
+			blendFactor.z = current.z;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+			blendFactor.x = Short4(0xFFFFu) - current.x;
+			blendFactor.y = Short4(0xFFFFu) - current.y;
+			blendFactor.z = Short4(0xFFFFu) - current.z;
+			break;
+		case VK_BLEND_FACTOR_DST_COLOR:
+			blendFactor.x = pixel.x;
+			blendFactor.y = pixel.y;
+			blendFactor.z = pixel.z;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+			blendFactor.x = Short4(0xFFFFu) - pixel.x;
+			blendFactor.y = Short4(0xFFFFu) - pixel.y;
+			blendFactor.z = Short4(0xFFFFu) - pixel.z;
+			break;
+		case VK_BLEND_FACTOR_SRC_ALPHA:
+			blendFactor.x = current.w;
+			blendFactor.y = current.w;
+			blendFactor.z = current.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+			blendFactor.x = Short4(0xFFFFu) - current.w;
+			blendFactor.y = Short4(0xFFFFu) - current.w;
+			blendFactor.z = Short4(0xFFFFu) - current.w;
+			break;
+		case VK_BLEND_FACTOR_DST_ALPHA:
+			blendFactor.x = pixel.w;
+			blendFactor.y = pixel.w;
+			blendFactor.z = pixel.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+			blendFactor.x = Short4(0xFFFFu) - pixel.w;
+			blendFactor.y = Short4(0xFFFFu) - pixel.w;
+			blendFactor.z = Short4(0xFFFFu) - pixel.w;
+			break;
+		case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+			blendFactor.x = Short4(0xFFFFu) - pixel.w;
+			blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
+			blendFactor.y = blendFactor.x;
+			blendFactor.z = blendFactor.x;
+			break;
+		case VK_BLEND_FACTOR_CONSTANT_COLOR:
+			blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[0]));
+			blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[1]));
+			blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[2]));
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+			blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[0]));
+			blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[1]));
+			blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[2]));
+			break;
+		case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+			blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
+			blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
+			blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+			blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
+			blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
+			blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
+			break;
+		default:
+			UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
 	}
 }
 
@@ -869,49 +869,49 @@
 {
 	switch(blendFactorAlphaActive)
 	{
-	case VK_BLEND_FACTOR_ZERO:
-		// Optimized
-		break;
-	case VK_BLEND_FACTOR_ONE:
-		// Optimized
-		break;
-	case VK_BLEND_FACTOR_SRC_COLOR:
-		blendFactor.w = current.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-		blendFactor.w = Short4(0xFFFFu) - current.w;
-		break;
-	case VK_BLEND_FACTOR_DST_COLOR:
-		blendFactor.w = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
-		blendFactor.w = Short4(0xFFFFu) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA:
-		blendFactor.w = current.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-		blendFactor.w = Short4(0xFFFFu) - current.w;
-		break;
-	case VK_BLEND_FACTOR_DST_ALPHA:
-		blendFactor.w = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
-		blendFactor.w = Short4(0xFFFFu) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-		blendFactor.w = Short4(0xFFFFu);
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_COLOR:
-	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
-		blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
-		blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
-		break;
-	default:
-		UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
+		case VK_BLEND_FACTOR_ZERO:
+			// Optimized
+			break;
+		case VK_BLEND_FACTOR_ONE:
+			// Optimized
+			break;
+		case VK_BLEND_FACTOR_SRC_COLOR:
+			blendFactor.w = current.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+			blendFactor.w = Short4(0xFFFFu) - current.w;
+			break;
+		case VK_BLEND_FACTOR_DST_COLOR:
+			blendFactor.w = pixel.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+			blendFactor.w = Short4(0xFFFFu) - pixel.w;
+			break;
+		case VK_BLEND_FACTOR_SRC_ALPHA:
+			blendFactor.w = current.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+			blendFactor.w = Short4(0xFFFFu) - current.w;
+			break;
+		case VK_BLEND_FACTOR_DST_ALPHA:
+			blendFactor.w = pixel.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+			blendFactor.w = Short4(0xFFFFu) - pixel.w;
+			break;
+		case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+			blendFactor.w = Short4(0xFFFFu);
+			break;
+		case VK_BLEND_FACTOR_CONSTANT_COLOR:
+		case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+			blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+		case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+			blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
+			break;
+		default:
+			UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
 	}
 }
 
@@ -929,139 +929,140 @@
 
 	switch(state.targetFormat[index])
 	{
-	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
-		buffer += 2 * x;
-		buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+			buffer += 2 * x;
+			buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
 
-		pixel.x = (c01 & Short4(0x7C00u)) << 1;
-		pixel.y = (c01 & Short4(0x03E0u)) << 6;
-		pixel.z = (c01 & Short4(0x001Fu)) << 11;
-		pixel.w = (c01 & Short4(0x8000u)) >> 15;
+			pixel.x = (c01 & Short4(0x7C00u)) << 1;
+			pixel.y = (c01 & Short4(0x03E0u)) << 6;
+			pixel.z = (c01 & Short4(0x001Fu)) << 11;
+			pixel.w = (c01 & Short4(0x8000u)) >> 15;
 
-		// Expand to 16 bit range
-		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
-		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
-		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
-		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
-		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
-		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
-		break;
-	case VK_FORMAT_R5G6B5_UNORM_PACK16:
-		buffer += 2 * x;
-		buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+			// Expand to 16 bit range
+			pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
+			pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
+			pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
+			pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
+			pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
+			pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
+			break;
+		case VK_FORMAT_R5G6B5_UNORM_PACK16:
+			buffer += 2 * x;
+			buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
 
-		pixel.x = c01 & Short4(0xF800u);
-		pixel.y = (c01 & Short4(0x07E0u)) << 5;
-		pixel.z = (c01 & Short4(0x001Fu)) << 11;
-		pixel.w = Short4(0xFFFFu);
+			pixel.x = c01 & Short4(0xF800u);
+			pixel.y = (c01 & Short4(0x07E0u)) << 5;
+			pixel.z = (c01 & Short4(0x001Fu)) << 11;
+			pixel.w = Short4(0xFFFFu);
 
-		// Expand to 16 bit range
-		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
-		pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
-		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6);
-		pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12);
-		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
-		pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
-		break;
-	case VK_FORMAT_B8G8R8A8_UNORM:
-	case VK_FORMAT_B8G8R8A8_SRGB:
-		buffer += 4 * x;
-		c01 = *Pointer<Short4>(buffer);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		c23 = *Pointer<Short4>(buffer);
-		pixel.z = c01;
-		pixel.y = c01;
-		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
-		pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
-		pixel.x = pixel.z;
-		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
-		pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
-		pixel.y = pixel.z;
-		pixel.w = pixel.x;
-		pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
-		pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
-		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
-		pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
-		break;
-	case VK_FORMAT_R8G8B8A8_UNORM:
-	case VK_FORMAT_R8G8B8A8_SRGB:
-		buffer += 4 * x;
-		c01 = *Pointer<Short4>(buffer);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		c23 = *Pointer<Short4>(buffer);
-		pixel.z = c01;
-		pixel.y = c01;
-		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
-		pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
-		pixel.x = pixel.z;
-		pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
-		pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
-		pixel.y = pixel.z;
-		pixel.w = pixel.x;
-		pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
-		pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
-		pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
-		pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
-		break;
-	case VK_FORMAT_R8_UNORM:
-		buffer += 1 * x;
-		pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
-		pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
-		pixel.y = Short4(0x0000);
-		pixel.z = Short4(0x0000);
-		pixel.w = Short4(0xFFFFu);
-		break;
-	case VK_FORMAT_R8G8_UNORM:
-		buffer += 2 * x;
-		c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
-		buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
-		pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
-		pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
-		pixel.z = Short4(0x0000u);
-		pixel.w = Short4(0xFFFFu);
-		break;
-	case VK_FORMAT_R16G16B16A16_UNORM:
-		pixel.x = *Pointer<Short4>(buffer + 8 * x);
-		pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		pixel.z = *Pointer<Short4>(buffer + 8 * x);
-		pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
-		transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
-		break;
-	case VK_FORMAT_R16G16_UNORM:
-		pixel.x = *Pointer<Short4>(buffer + 4 * x);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		pixel.y = *Pointer<Short4>(buffer + 4 * x);
-		pixel.z = pixel.x;
-		pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
-		pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
-		pixel.y = pixel.z;
-		pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
-		pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
-		pixel.z = Short4(0xFFFFu);
-		pixel.w = Short4(0xFFFFu);
-		break;
-	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
-	{
-		Int4 v = Int4(0);
-		v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0);
-		v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-		v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
-		v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
+			// Expand to 16 bit range
+			pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
+			pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
+			pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6);
+			pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12);
+			pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
+			pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
+			break;
+		case VK_FORMAT_B8G8R8A8_UNORM:
+		case VK_FORMAT_B8G8R8A8_SRGB:
+			buffer += 4 * x;
+			c01 = *Pointer<Short4>(buffer);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			c23 = *Pointer<Short4>(buffer);
+			pixel.z = c01;
+			pixel.y = c01;
+			pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
+			pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
+			pixel.x = pixel.z;
+			pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
+			pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
+			pixel.y = pixel.z;
+			pixel.w = pixel.x;
+			pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
+			pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
+			pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
+			pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
+			break;
+		case VK_FORMAT_R8G8B8A8_UNORM:
+		case VK_FORMAT_R8G8B8A8_SRGB:
+			buffer += 4 * x;
+			c01 = *Pointer<Short4>(buffer);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			c23 = *Pointer<Short4>(buffer);
+			pixel.z = c01;
+			pixel.y = c01;
+			pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
+			pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
+			pixel.x = pixel.z;
+			pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
+			pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
+			pixel.y = pixel.z;
+			pixel.w = pixel.x;
+			pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
+			pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
+			pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
+			pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
+			break;
+		case VK_FORMAT_R8_UNORM:
+			buffer += 1 * x;
+			pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
+			pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
+			pixel.y = Short4(0x0000);
+			pixel.z = Short4(0x0000);
+			pixel.w = Short4(0xFFFFu);
+			break;
+		case VK_FORMAT_R8G8_UNORM:
+			buffer += 2 * x;
+			c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
+			pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
+			pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
+			pixel.z = Short4(0x0000u);
+			pixel.w = Short4(0xFFFFu);
+			break;
+		case VK_FORMAT_R16G16B16A16_UNORM:
+			pixel.x = *Pointer<Short4>(buffer + 8 * x);
+			pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.z = *Pointer<Short4>(buffer + 8 * x);
+			pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
+			transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
+			break;
+		case VK_FORMAT_R16G16_UNORM:
+			pixel.x = *Pointer<Short4>(buffer + 4 * x);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.y = *Pointer<Short4>(buffer + 4 * x);
+			pixel.z = pixel.x;
+			pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
+			pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
+			pixel.y = pixel.z;
+			pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
+			pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
+			pixel.z = Short4(0xFFFFu);
+			pixel.w = Short4(0xFFFFu);
+			break;
+		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+		{
+			Int4 v = Int4(0);
+			v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0);
+			v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
+			v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
 
-		pixel.x = Short4(v << 6) & Short4(0xFFC0u);
-		pixel.y = Short4(v >> 4) & Short4(0xFFC0u);
-		pixel.z = Short4(v >> 14) & Short4(0xFFC0u);
-		pixel.w = Short4(v >> 16) & Short4(0xC000u);
-	} break;
-	default:
-		UNIMPLEMENTED("VkFormat %d", state.targetFormat[index]);
+			pixel.x = Short4(v << 6) & Short4(0xFFC0u);
+			pixel.y = Short4(v >> 4) & Short4(0xFFC0u);
+			pixel.z = Short4(v >> 14) & Short4(0xFFC0u);
+			pixel.w = Short4(v >> 16) & Short4(0xC000u);
+		}
+		break;
+		default:
+			UNIMPLEMENTED("VkFormat %d", state.targetFormat[index]);
 	}
 
 	if(isSRGB(index))
@@ -1103,46 +1104,46 @@
 
 	switch(state.blendState[index].blendOperation)
 	{
-	case VK_BLEND_OP_ADD:
-		current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
-		current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
-		current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
-		break;
-	case VK_BLEND_OP_SUBTRACT:
-		current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
-		current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
-		current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
-		break;
-	case VK_BLEND_OP_REVERSE_SUBTRACT:
-		current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
-		current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
-		current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
-		break;
-	case VK_BLEND_OP_MIN:
-		current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
-		current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
-		current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
-		break;
-	case VK_BLEND_OP_MAX:
-		current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
-		current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
-		current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
-		break;
-	case VK_BLEND_OP_SRC_EXT:
-		// No operation
-		break;
-	case VK_BLEND_OP_DST_EXT:
-		current.x = pixel.x;
-		current.y = pixel.y;
-		current.z = pixel.z;
-		break;
-	case VK_BLEND_OP_ZERO_EXT:
-		current.x = Short4(0x0000);
-		current.y = Short4(0x0000);
-		current.z = Short4(0x0000);
-		break;
-	default:
-		UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
+		case VK_BLEND_OP_ADD:
+			current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
+			current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
+			current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
+			break;
+		case VK_BLEND_OP_SUBTRACT:
+			current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
+			current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
+			current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
+			break;
+		case VK_BLEND_OP_REVERSE_SUBTRACT:
+			current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
+			current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
+			current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
+			break;
+		case VK_BLEND_OP_MIN:
+			current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
+			current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
+			current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
+			break;
+		case VK_BLEND_OP_MAX:
+			current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
+			current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
+			current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
+			break;
+		case VK_BLEND_OP_SRC_EXT:
+			// No operation
+			break;
+		case VK_BLEND_OP_DST_EXT:
+			current.x = pixel.x;
+			current.y = pixel.y;
+			current.z = pixel.z;
+			break;
+		case VK_BLEND_OP_ZERO_EXT:
+			current.x = Short4(0x0000);
+			current.y = Short4(0x0000);
+			current.z = Short4(0x0000);
+			break;
+		default:
+			UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
 	}
 
 	blendFactorAlpha(sourceFactor, current, pixel, state.blendState[index].sourceBlendFactorAlpha);
@@ -1160,32 +1161,32 @@
 
 	switch(state.blendState[index].blendOperationAlpha)
 	{
-	case VK_BLEND_OP_ADD:
-		current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
-		break;
-	case VK_BLEND_OP_SUBTRACT:
-		current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
-		break;
-	case VK_BLEND_OP_REVERSE_SUBTRACT:
-		current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
-		break;
-	case VK_BLEND_OP_MIN:
-		current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
-		break;
-	case VK_BLEND_OP_MAX:
-		current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
-		break;
-	case VK_BLEND_OP_SRC_EXT:
-		// No operation
-		break;
-	case VK_BLEND_OP_DST_EXT:
-		current.w = pixel.w;
-		break;
-	case VK_BLEND_OP_ZERO_EXT:
-		current.w = Short4(0x0000);
-		break;
-	default:
-		UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
+		case VK_BLEND_OP_ADD:
+			current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
+			break;
+		case VK_BLEND_OP_SUBTRACT:
+			current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
+			break;
+		case VK_BLEND_OP_REVERSE_SUBTRACT:
+			current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
+			break;
+		case VK_BLEND_OP_MIN:
+			current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
+			break;
+		case VK_BLEND_OP_MAX:
+			current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
+			break;
+		case VK_BLEND_OP_SRC_EXT:
+			// No operation
+			break;
+		case VK_BLEND_OP_DST_EXT:
+			current.w = pixel.w;
+			break;
+		case VK_BLEND_OP_ZERO_EXT:
+			current.w = Short4(0x0000);
+			break;
+		default:
+			UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
 	}
 }
 
@@ -1198,21 +1199,21 @@
 
 	switch(state.targetFormat[index])
 	{
-	case VK_FORMAT_B8G8R8A8_UNORM:
-	case VK_FORMAT_B8G8R8A8_SRGB:
-	case VK_FORMAT_R8G8B8A8_UNORM:
-	case VK_FORMAT_R8G8B8A8_SRGB:
-	case VK_FORMAT_R8G8_UNORM:
-	case VK_FORMAT_R8_UNORM:
-	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
-		current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
-		current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
-		current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
-		current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
-		break;
-	default:
-		break;
+		case VK_FORMAT_B8G8R8A8_UNORM:
+		case VK_FORMAT_B8G8R8A8_SRGB:
+		case VK_FORMAT_R8G8B8A8_UNORM:
+		case VK_FORMAT_R8G8B8A8_SRGB:
+		case VK_FORMAT_R8G8_UNORM:
+		case VK_FORMAT_R8_UNORM:
+		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+			current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
+			current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
+			current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
+			current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
+			break;
+		default:
+			break;
 	}
 
 	int rgbaWriteMask = state.colorWriteActive(index);
@@ -1220,7 +1221,7 @@
 
 	switch(state.targetFormat[index])
 	{
-	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
 		{
 			current.w = current.w & Short4(0x8000u);
 			current.x = As<UShort4>(current.x & Short4(0xF800)) >> 1;
@@ -1230,7 +1231,7 @@
 			current.x = current.x | current.y | current.z | current.w;
 		}
 		break;
-	case VK_FORMAT_R5G6B5_UNORM_PACK16:
+		case VK_FORMAT_R5G6B5_UNORM_PACK16:
 		{
 			current.x = current.x & Short4(0xF800u);
 			current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
@@ -1239,121 +1240,121 @@
 			current.x = current.x | current.y | current.z;
 		}
 		break;
-	case VK_FORMAT_B8G8R8A8_UNORM:
-	case VK_FORMAT_B8G8R8A8_SRGB:
-		if(rgbaWriteMask == 0x7)
-		{
+		case VK_FORMAT_B8G8R8A8_UNORM:
+		case VK_FORMAT_B8G8R8A8_SRGB:
+			if(rgbaWriteMask == 0x7)
+			{
+				current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+				current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+				current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+
+				current.z = As<Short4>(PackUnsigned(current.z, current.x));
+				current.y = As<Short4>(PackUnsigned(current.y, current.y));
+
+				current.x = current.z;
+				current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+				current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+				current.y = current.z;
+				current.z = As<Short4>(UnpackLow(current.z, current.x));
+				current.y = As<Short4>(UnpackHigh(current.y, current.x));
+			}
+			else
+			{
+				current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+				current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+				current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+				current.w = As<Short4>(As<UShort4>(current.w) >> 8);
+
+				current.z = As<Short4>(PackUnsigned(current.z, current.x));
+				current.y = As<Short4>(PackUnsigned(current.y, current.w));
+
+				current.x = current.z;
+				current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+				current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+				current.y = current.z;
+				current.z = As<Short4>(UnpackLow(current.z, current.x));
+				current.y = As<Short4>(UnpackHigh(current.y, current.x));
+			}
+			break;
+		case VK_FORMAT_R8G8B8A8_UNORM:
+		case VK_FORMAT_R8G8B8A8_SRGB:
+		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+			if(rgbaWriteMask == 0x7)
+			{
+				current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+				current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+				current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+
+				current.z = As<Short4>(PackUnsigned(current.x, current.z));
+				current.y = As<Short4>(PackUnsigned(current.y, current.y));
+
+				current.x = current.z;
+				current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+				current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+				current.y = current.z;
+				current.z = As<Short4>(UnpackLow(current.z, current.x));
+				current.y = As<Short4>(UnpackHigh(current.y, current.x));
+			}
+			else
+			{
+				current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+				current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+				current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+				current.w = As<Short4>(As<UShort4>(current.w) >> 8);
+
+				current.z = As<Short4>(PackUnsigned(current.x, current.z));
+				current.y = As<Short4>(PackUnsigned(current.y, current.w));
+
+				current.x = current.z;
+				current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+				current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+				current.y = current.z;
+				current.z = As<Short4>(UnpackLow(current.z, current.x));
+				current.y = As<Short4>(UnpackHigh(current.y, current.x));
+			}
+			break;
+		case VK_FORMAT_R8G8_UNORM:
 			current.x = As<Short4>(As<UShort4>(current.x) >> 8);
 			current.y = As<Short4>(As<UShort4>(current.y) >> 8);
-			current.z = As<Short4>(As<UShort4>(current.z) >> 8);
-
-			current.z = As<Short4>(PackUnsigned(current.z, current.x));
+			current.x = As<Short4>(PackUnsigned(current.x, current.x));
 			current.y = As<Short4>(PackUnsigned(current.y, current.y));
-
-			current.x = current.z;
-			current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
-			current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
-			current.y = current.z;
-			current.z = As<Short4>(UnpackLow(current.z, current.x));
-			current.y = As<Short4>(UnpackHigh(current.y, current.x));
-		}
-		else
-		{
+			current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
+			break;
+		case VK_FORMAT_R8_UNORM:
 			current.x = As<Short4>(As<UShort4>(current.x) >> 8);
-			current.y = As<Short4>(As<UShort4>(current.y) >> 8);
-			current.z = As<Short4>(As<UShort4>(current.z) >> 8);
-			current.w = As<Short4>(As<UShort4>(current.w) >> 8);
-
-			current.z = As<Short4>(PackUnsigned(current.z, current.x));
-			current.y = As<Short4>(PackUnsigned(current.y, current.w));
-
-			current.x = current.z;
-			current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
-			current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+			current.x = As<Short4>(PackUnsigned(current.x, current.x));
+			break;
+		case VK_FORMAT_R16G16_UNORM:
+			current.z = current.x;
+			current.x = As<Short4>(UnpackLow(current.x, current.y));
+			current.z = As<Short4>(UnpackHigh(current.z, current.y));
 			current.y = current.z;
-			current.z = As<Short4>(UnpackLow(current.z, current.x));
-			current.y = As<Short4>(UnpackHigh(current.y, current.x));
-		}
-		break;
-	case VK_FORMAT_R8G8B8A8_UNORM:
-	case VK_FORMAT_R8G8B8A8_SRGB:
-	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
-		if(rgbaWriteMask == 0x7)
+			break;
+		case VK_FORMAT_R16G16B16A16_UNORM:
+			transpose4x4(current.x, current.y, current.z, current.w);
+			break;
+		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
 		{
-			current.x = As<Short4>(As<UShort4>(current.x) >> 8);
-			current.y = As<Short4>(As<UShort4>(current.y) >> 8);
-			current.z = As<Short4>(As<UShort4>(current.z) >> 8);
-
-			current.z = As<Short4>(PackUnsigned(current.x, current.z));
-			current.y = As<Short4>(PackUnsigned(current.y, current.y));
-
-			current.x = current.z;
-			current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
-			current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
-			current.y = current.z;
-			current.z = As<Short4>(UnpackLow(current.z, current.x));
-			current.y = As<Short4>(UnpackHigh(current.y, current.x));
+			auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
+			auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
+			auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
+			auto a = (Int4(current.w) >> 14) & Int4(0x3);
+			Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
+			auto c02 = As<Int2>(Int4(packed.xzzz));  // TODO: auto c02 = packed.xz;
+			auto c13 = As<Int2>(Int4(packed.ywww));  // TODO: auto c13 = packed.yw;
+			current.x = UnpackLow(c02, c13);
+			current.y = UnpackHigh(c02, c13);
+			break;
 		}
-		else
-		{
-			current.x = As<Short4>(As<UShort4>(current.x) >> 8);
-			current.y = As<Short4>(As<UShort4>(current.y) >> 8);
-			current.z = As<Short4>(As<UShort4>(current.z) >> 8);
-			current.w = As<Short4>(As<UShort4>(current.w) >> 8);
-
-			current.z = As<Short4>(PackUnsigned(current.x, current.z));
-			current.y = As<Short4>(PackUnsigned(current.y, current.w));
-
-			current.x = current.z;
-			current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
-			current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
-			current.y = current.z;
-			current.z = As<Short4>(UnpackLow(current.z, current.x));
-			current.y = As<Short4>(UnpackHigh(current.y, current.x));
-		}
-		break;
-	case VK_FORMAT_R8G8_UNORM:
-		current.x = As<Short4>(As<UShort4>(current.x) >> 8);
-		current.y = As<Short4>(As<UShort4>(current.y) >> 8);
-		current.x = As<Short4>(PackUnsigned(current.x, current.x));
-		current.y = As<Short4>(PackUnsigned(current.y, current.y));
-		current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
-		break;
-	case VK_FORMAT_R8_UNORM:
-		current.x = As<Short4>(As<UShort4>(current.x) >> 8);
-		current.x = As<Short4>(PackUnsigned(current.x, current.x));
-		break;
-	case VK_FORMAT_R16G16_UNORM:
-		current.z = current.x;
-		current.x = As<Short4>(UnpackLow(current.x, current.y));
-		current.z = As<Short4>(UnpackHigh(current.z, current.y));
-		current.y = current.z;
-		break;
-	case VK_FORMAT_R16G16B16A16_UNORM:
-		transpose4x4(current.x, current.y, current.z, current.w);
-		break;
-	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
-	{
-		auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
-		auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
-		auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
-		auto a = (Int4(current.w) >> 14) & Int4(0x3);
-		Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
-		auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
-		auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
-		current.x = UnpackLow(c02, c13);
-		current.y = UnpackHigh(c02, c13);
-		break;
-	}
-	default:
-		UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+		default:
+			UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
 	}
 
 	Short4 c01 = current.z;
 	Short4 c23 = current.y;
 
-	Int xMask;   // Combination of all masks
+	Int xMask;  // Combination of all masks
 
 	if(state.depthTestActive)
 	{
@@ -1373,26 +1374,26 @@
 
 	switch(state.targetFormat[index])
 	{
-	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
 		{
 			buffer += 2 * x;
 			Int value = *Pointer<Int>(buffer);
 
-			Int channelMask = *Pointer<Int>(constants + OFFSET(Constants,mask5551Q[bgraWriteMask & 0xF][0]));
+			Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask5551Q[bgraWriteMask & 0xF][0]));
 
 			Int c01 = Extract(As<Int2>(current.x), 0);
-			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
+			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
 			if(bgraWriteMask != 0x0000000F)
 			{
 				mask01 &= channelMask;
 			}
 			*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
 
-			buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 			value = *Pointer<Int>(buffer);
 
 			Int c23 = Extract(As<Int2>(current.x), 1);
-			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
+			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
 			if(bgraWriteMask != 0x0000000F)
 			{
 				mask23 &= channelMask;
@@ -1400,26 +1401,26 @@
 			*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
 		}
 		break;
-	case VK_FORMAT_R5G6B5_UNORM_PACK16:
+		case VK_FORMAT_R5G6B5_UNORM_PACK16:
 		{
 			buffer += 2 * x;
 			Int value = *Pointer<Int>(buffer);
 
-			Int channelMask = *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
+			Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask565Q[bgraWriteMask & 0x7][0]));
 
 			Int c01 = Extract(As<Int2>(current.x), 0);
-			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
+			Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
 			if((bgraWriteMask & 0x00000007) != 0x00000007)
 			{
 				mask01 &= channelMask;
 			}
 			*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
 
-			buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 			value = *Pointer<Int>(buffer);
 
 			Int c23 = Extract(As<Int2>(current.x), 1);
-			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
+			Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
 			if((bgraWriteMask & 0x00000007) != 0x00000007)
 			{
 				mask23 &= channelMask;
@@ -1427,24 +1428,24 @@
 			*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
 		}
 		break;
-	case VK_FORMAT_B8G8R8A8_UNORM:
-	case VK_FORMAT_B8G8R8A8_SRGB:
+		case VK_FORMAT_B8G8R8A8_UNORM:
+		case VK_FORMAT_B8G8R8A8_SRGB:
 		{
 			buffer += x * 4;
 			Short4 value = *Pointer<Short4>(buffer);
-			Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
+			Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[bgraWriteMask][0]));
 
-			Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
+			Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
 			if(bgraWriteMask != 0x0000000F)
 			{
 				mask01 &= channelMask;
 			}
 			*Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
 
-			buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 			value = *Pointer<Short4>(buffer);
 
-			Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
+			Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
 			if(bgraWriteMask != 0x0000000F)
 			{
 				mask23 &= channelMask;
@@ -1452,26 +1453,26 @@
 			*Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
 		}
 		break;
-	case VK_FORMAT_R8G8B8A8_UNORM:
-	case VK_FORMAT_R8G8B8A8_SRGB:
-	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+		case VK_FORMAT_R8G8B8A8_UNORM:
+		case VK_FORMAT_R8G8B8A8_SRGB:
+		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
 		{
 			buffer += x * 4;
 			Short4 value = *Pointer<Short4>(buffer);
-			Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
+			Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
 
-			Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
+			Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
 			if(rgbaWriteMask != 0x0000000F)
 			{
 				mask01 &= channelMask;
 			}
 			*Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
 
-			buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 			value = *Pointer<Short4>(buffer);
 
-			Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
+			Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
 			if(rgbaWriteMask != 0x0000000F)
 			{
 				mask23 &= channelMask;
@@ -1479,49 +1480,49 @@
 			*Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
 		}
 		break;
-	case VK_FORMAT_R8G8_UNORM:
-		if((rgbaWriteMask & 0x00000003) != 0x0)
-		{
-			buffer += 2 * x;
-			Int2 value;
-			value = Insert(value, *Pointer<Int>(buffer), 0);
-			Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-			value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
-
-			Int2 packedCol = As<Int2>(current.x);
-
-			UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
-			if((rgbaWriteMask & 0x3) != 0x3)
+		case VK_FORMAT_R8G8_UNORM:
+			if((rgbaWriteMask & 0x00000003) != 0x0)
 			{
-				Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
-				UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
-				mergedMask &= rgbaMask;
+				buffer += 2 * x;
+				Int2 value;
+				value = Insert(value, *Pointer<Int>(buffer), 0);
+				Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
+
+				Int2 packedCol = As<Int2>(current.x);
+
+				UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
+				if((rgbaWriteMask & 0x3) != 0x3)
+				{
+					Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
+					UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+					mergedMask &= rgbaMask;
+				}
+
+				packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
+
+				*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
+				*Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
 			}
+			break;
+		case VK_FORMAT_R8_UNORM:
+			if(rgbaWriteMask & 0x00000001)
+			{
+				buffer += 1 * x;
+				Short4 value;
+				value = Insert(value, *Pointer<Short>(buffer), 0);
+				Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
 
-			packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
+				current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
+				value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
+				current.x |= value;
 
-			*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
-			*Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
-		}
-		break;
-	case VK_FORMAT_R8_UNORM:
-		if(rgbaWriteMask & 0x00000001)
-		{
-			buffer += 1 * x;
-			Short4 value;
-			value = Insert(value, *Pointer<Short>(buffer), 0);
-			Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-			value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
-
-			current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
-			value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
-			current.x |= value;
-
-			*Pointer<Short>(buffer) = Extract(current.x, 0);
-			*Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
-		}
-		break;
-	case VK_FORMAT_R16G16_UNORM:
+				*Pointer<Short>(buffer) = Extract(current.x, 0);
+				*Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
+			}
+			break;
+		case VK_FORMAT_R16G16_UNORM:
 		{
 			buffer += 4 * x;
 
@@ -1530,35 +1531,35 @@
 			if((rgbaWriteMask & 0x00000003) != 0x00000003)
 			{
 				Short4 masked = value;
-				current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
-				masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
+				current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0]));
+				masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0]));
 				current.x |= masked;
 			}
 
-			current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
-			value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
+			current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+			value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD01Q) + xMask * 8);
 			current.x |= value;
 			*Pointer<Short4>(buffer) = current.x;
 
-			buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 
 			value = *Pointer<Short4>(buffer);
 
 			if((rgbaWriteMask & 0x00000003) != 0x00000003)
 			{
 				Short4 masked = value;
-				current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
-				masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
+				current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0]));
+				masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0]));
 				current.y |= masked;
 			}
 
-			current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
-			value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
+			current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+			value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD23Q) + xMask * 8);
 			current.y |= value;
 			*Pointer<Short4>(buffer) = current.y;
 		}
 		break;
-	case VK_FORMAT_R16G16B16A16_UNORM:
+		case VK_FORMAT_R16G16B16A16_UNORM:
 		{
 			buffer += 8 * x;
 
@@ -1568,13 +1569,13 @@
 				if(rgbaWriteMask != 0x0000000F)
 				{
 					Short4 masked = value;
-					current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
-					masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
+					current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+					masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
 					current.x |= masked;
 				}
 
-				current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
-				value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
+				current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ0Q) + xMask * 8);
+				value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ0Q) + xMask * 8);
 				current.x |= value;
 				*Pointer<Short4>(buffer) = current.x;
 			}
@@ -1585,18 +1586,18 @@
 				if(rgbaWriteMask != 0x0000000F)
 				{
 					Short4 masked = value;
-					current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
-					masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
+					current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+					masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
 					current.y |= masked;
 				}
 
-				current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
-				value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
+				current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ1Q) + xMask * 8);
+				value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ1Q) + xMask * 8);
 				current.y |= value;
 				*Pointer<Short4>(buffer + 8) = current.y;
 			}
 
-			buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 
 			{
 				Short4 value = *Pointer<Short4>(buffer);
@@ -1604,13 +1605,13 @@
 				if(rgbaWriteMask != 0x0000000F)
 				{
 					Short4 masked = value;
-					current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
-					masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
+					current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+					masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
 					current.z |= masked;
 				}
 
-				current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
-				value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
+				current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ2Q) + xMask * 8);
+				value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ2Q) + xMask * 8);
 				current.z |= value;
 				*Pointer<Short4>(buffer) = current.z;
 			}
@@ -1621,13 +1622,13 @@
 				if(rgbaWriteMask != 0x0000000F)
 				{
 					Short4 masked = value;
-					current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
-					masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
+					current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+					masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
 					current.w |= masked;
 				}
 
-				current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
-				value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
+				current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ3Q) + xMask * 8);
+				value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ3Q) + xMask * 8);
 				current.w |= value;
 				*Pointer<Short4>(buffer + 8) = current.w;
 			}
@@ -1656,8 +1657,8 @@
 			*Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask);
 		}
 		break;
-	default:
-		UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+		default:
+			UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
 	}
 }
 
@@ -1665,85 +1666,85 @@
 {
 	switch(blendFactorActive)
 	{
-	case VK_BLEND_FACTOR_ZERO:
-		blendFactor.x = Float4(0);
-		blendFactor.y = Float4(0);
-		blendFactor.z = Float4(0);
-		break;
-	case VK_BLEND_FACTOR_ONE:
-		blendFactor.x = Float4(1);
-		blendFactor.y = Float4(1);
-		blendFactor.z = Float4(1);
-		break;
-	case VK_BLEND_FACTOR_SRC_COLOR:
-		blendFactor.x = oC.x;
-		blendFactor.y = oC.y;
-		blendFactor.z = oC.z;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-		blendFactor.x = Float4(1.0f) - oC.x;
-		blendFactor.y = Float4(1.0f) - oC.y;
-		blendFactor.z = Float4(1.0f) - oC.z;
-		break;
-	case VK_BLEND_FACTOR_DST_COLOR:
-		blendFactor.x = pixel.x;
-		blendFactor.y = pixel.y;
-		blendFactor.z = pixel.z;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
-		blendFactor.x = Float4(1.0f) - pixel.x;
-		blendFactor.y = Float4(1.0f) - pixel.y;
-		blendFactor.z = Float4(1.0f) - pixel.z;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA:
-		blendFactor.x = oC.w;
-		blendFactor.y = oC.w;
-		blendFactor.z = oC.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-		blendFactor.x = Float4(1.0f) - oC.w;
-		blendFactor.y = Float4(1.0f) - oC.w;
-		blendFactor.z = Float4(1.0f) - oC.w;
-		break;
-	case VK_BLEND_FACTOR_DST_ALPHA:
-		blendFactor.x = pixel.w;
-		blendFactor.y = pixel.w;
-		blendFactor.z = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
-		blendFactor.x = Float4(1.0f) - pixel.w;
-		blendFactor.y = Float4(1.0f) - pixel.w;
-		blendFactor.z = Float4(1.0f) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-		blendFactor.x = Float4(1.0f) - pixel.w;
-		blendFactor.x = Min(blendFactor.x, oC.w);
-		blendFactor.y = blendFactor.x;
-		blendFactor.z = blendFactor.x;
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_COLOR:
-		blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
-		blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
-		blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
-		blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
-		blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
-		blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
-		blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
-		blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
-		blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
-		blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
-		blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
-		blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
-		break;
+		case VK_BLEND_FACTOR_ZERO:
+			blendFactor.x = Float4(0);
+			blendFactor.y = Float4(0);
+			blendFactor.z = Float4(0);
+			break;
+		case VK_BLEND_FACTOR_ONE:
+			blendFactor.x = Float4(1);
+			blendFactor.y = Float4(1);
+			blendFactor.z = Float4(1);
+			break;
+		case VK_BLEND_FACTOR_SRC_COLOR:
+			blendFactor.x = oC.x;
+			blendFactor.y = oC.y;
+			blendFactor.z = oC.z;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+			blendFactor.x = Float4(1.0f) - oC.x;
+			blendFactor.y = Float4(1.0f) - oC.y;
+			blendFactor.z = Float4(1.0f) - oC.z;
+			break;
+		case VK_BLEND_FACTOR_DST_COLOR:
+			blendFactor.x = pixel.x;
+			blendFactor.y = pixel.y;
+			blendFactor.z = pixel.z;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+			blendFactor.x = Float4(1.0f) - pixel.x;
+			blendFactor.y = Float4(1.0f) - pixel.y;
+			blendFactor.z = Float4(1.0f) - pixel.z;
+			break;
+		case VK_BLEND_FACTOR_SRC_ALPHA:
+			blendFactor.x = oC.w;
+			blendFactor.y = oC.w;
+			blendFactor.z = oC.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+			blendFactor.x = Float4(1.0f) - oC.w;
+			blendFactor.y = Float4(1.0f) - oC.w;
+			blendFactor.z = Float4(1.0f) - oC.w;
+			break;
+		case VK_BLEND_FACTOR_DST_ALPHA:
+			blendFactor.x = pixel.w;
+			blendFactor.y = pixel.w;
+			blendFactor.z = pixel.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+			blendFactor.x = Float4(1.0f) - pixel.w;
+			blendFactor.y = Float4(1.0f) - pixel.w;
+			blendFactor.z = Float4(1.0f) - pixel.w;
+			break;
+		case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+			blendFactor.x = Float4(1.0f) - pixel.w;
+			blendFactor.x = Min(blendFactor.x, oC.w);
+			blendFactor.y = blendFactor.x;
+			blendFactor.z = blendFactor.x;
+			break;
+		case VK_BLEND_FACTOR_CONSTANT_COLOR:
+			blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[0]));
+			blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[1]));
+			blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[2]));
+			break;
+		case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+			blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
+			blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
+			blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+			blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[0]));
+			blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[1]));
+			blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[2]));
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+			blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
+			blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
+			blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
+			break;
 
-	default:
-		UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
+		default:
+			UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
 	}
 }
 
@@ -1751,49 +1752,49 @@
 {
 	switch(blendFactorAlphaActive)
 	{
-	case VK_BLEND_FACTOR_ZERO:
-		blendFactor.w = Float4(0);
-		break;
-	case VK_BLEND_FACTOR_ONE:
-		blendFactor.w = Float4(1);
-		break;
-	case VK_BLEND_FACTOR_SRC_COLOR:
-		blendFactor.w = oC.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-		blendFactor.w = Float4(1.0f) - oC.w;
-		break;
-	case VK_BLEND_FACTOR_DST_COLOR:
-		blendFactor.w = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
-		blendFactor.w = Float4(1.0f) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA:
-		blendFactor.w = oC.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-		blendFactor.w = Float4(1.0f) - oC.w;
-		break;
-	case VK_BLEND_FACTOR_DST_ALPHA:
-		blendFactor.w = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
-		blendFactor.w = Float4(1.0f) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-		blendFactor.w = Float4(1.0f);
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_COLOR:
-	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
-		blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
-		blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
-		break;
-	default:
-		UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
+		case VK_BLEND_FACTOR_ZERO:
+			blendFactor.w = Float4(0);
+			break;
+		case VK_BLEND_FACTOR_ONE:
+			blendFactor.w = Float4(1);
+			break;
+		case VK_BLEND_FACTOR_SRC_COLOR:
+			blendFactor.w = oC.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+			blendFactor.w = Float4(1.0f) - oC.w;
+			break;
+		case VK_BLEND_FACTOR_DST_COLOR:
+			blendFactor.w = pixel.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+			blendFactor.w = Float4(1.0f) - pixel.w;
+			break;
+		case VK_BLEND_FACTOR_SRC_ALPHA:
+			blendFactor.w = oC.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+			blendFactor.w = Float4(1.0f) - oC.w;
+			break;
+		case VK_BLEND_FACTOR_DST_ALPHA:
+			blendFactor.w = pixel.w;
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+			blendFactor.w = Float4(1.0f) - pixel.w;
+			break;
+		case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+			blendFactor.w = Float4(1.0f);
+			break;
+		case VK_BLEND_FACTOR_CONSTANT_COLOR:
+		case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+			blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
+			break;
+		case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+		case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+			blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
+			break;
+		default:
+			UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
 	}
 }
 
@@ -1829,89 +1830,89 @@
 
 	switch(state.targetFormat[index])
 	{
-	case VK_FORMAT_R32_SINT:
-	case VK_FORMAT_R32_UINT:
-	case VK_FORMAT_R32_SFLOAT:
-		// FIXME: movlps
-		pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
-		pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-		// FIXME: movhps
-		pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
-		pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
-		pixel.y = pixel.z = pixel.w = one;
-		break;
-	case VK_FORMAT_R32G32_SINT:
-	case VK_FORMAT_R32G32_UINT:
-	case VK_FORMAT_R32G32_SFLOAT:
-		pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-		pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
-		pixel.z = pixel.x;
-		pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202);
-		pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313);
-		pixel.y = pixel.z;
-		pixel.z = pixel.w = one;
-		break;
-	case VK_FORMAT_R32G32B32A32_SFLOAT:
-	case VK_FORMAT_R32G32B32A32_SINT:
-	case VK_FORMAT_R32G32B32A32_UINT:
-		pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
-		pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-		pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
-		pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
-		transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
-		break;
-	case VK_FORMAT_R16_SFLOAT:
-		pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0));
-		pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2));
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-		pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0));
-		pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2));
-		pixel.y = pixel.z = pixel.w = one;
-		break;
-	case VK_FORMAT_R16G16_SFLOAT:
-		pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0));
-		pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2));
-		pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4));
-		pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6));
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-		pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0));
-		pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2));
-		pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4));
-		pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6));
-		pixel.z = pixel.w = one;
-		break;
-	case VK_FORMAT_R16G16B16A16_SFLOAT:
-		pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
-		pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
-		pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
-		pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
-		pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
-		pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
-		pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
-		pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-		pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
-		pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
-		pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
-		pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
-		pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
-		pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
-		pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
-		pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
-		break;
-	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
-		pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
-		pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-		pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
-		pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
-		transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
-		break;
-	default:
-		UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+		case VK_FORMAT_R32_SINT:
+		case VK_FORMAT_R32_UINT:
+		case VK_FORMAT_R32_SFLOAT:
+			// FIXME: movlps
+			pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
+			pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			// FIXME: movhps
+			pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
+			pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
+			pixel.y = pixel.z = pixel.w = one;
+			break;
+		case VK_FORMAT_R32G32_SINT:
+		case VK_FORMAT_R32G32_UINT:
+		case VK_FORMAT_R32G32_SFLOAT:
+			pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
+			pixel.z = pixel.x;
+			pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202);
+			pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313);
+			pixel.y = pixel.z;
+			pixel.z = pixel.w = one;
+			break;
+		case VK_FORMAT_R32G32B32A32_SFLOAT:
+		case VK_FORMAT_R32G32B32A32_SINT:
+		case VK_FORMAT_R32G32B32A32_UINT:
+			pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
+			pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
+			pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
+			transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
+			break;
+		case VK_FORMAT_R16_SFLOAT:
+			pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0));
+			pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0));
+			pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2));
+			pixel.y = pixel.z = pixel.w = one;
+			break;
+		case VK_FORMAT_R16G16_SFLOAT:
+			pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0));
+			pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2));
+			pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4));
+			pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0));
+			pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2));
+			pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4));
+			pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6));
+			pixel.z = pixel.w = one;
+			break;
+		case VK_FORMAT_R16G16B16A16_SFLOAT:
+			pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
+			pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
+			pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
+			pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
+			pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
+			pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
+			pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
+			pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
+			pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
+			pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
+			pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
+			pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
+			pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
+			pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
+			pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
+			break;
+		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+			pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
+			pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+			pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
+			pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
+			transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
+			break;
+		default:
+			UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
 	}
 
 	// Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
@@ -1931,46 +1932,46 @@
 
 	switch(state.blendState[index].blendOperation)
 	{
-	case VK_BLEND_OP_ADD:
-		oC.x += pixel.x;
-		oC.y += pixel.y;
-		oC.z += pixel.z;
-		break;
-	case VK_BLEND_OP_SUBTRACT:
-		oC.x -= pixel.x;
-		oC.y -= pixel.y;
-		oC.z -= pixel.z;
-		break;
-	case VK_BLEND_OP_REVERSE_SUBTRACT:
-		oC.x = pixel.x - oC.x;
-		oC.y = pixel.y - oC.y;
-		oC.z = pixel.z - oC.z;
-		break;
-	case VK_BLEND_OP_MIN:
-		oC.x = Min(oC.x, pixel.x);
-		oC.y = Min(oC.y, pixel.y);
-		oC.z = Min(oC.z, pixel.z);
-		break;
-	case VK_BLEND_OP_MAX:
-		oC.x = Max(oC.x, pixel.x);
-		oC.y = Max(oC.y, pixel.y);
-		oC.z = Max(oC.z, pixel.z);
-		break;
-	case VK_BLEND_OP_SRC_EXT:
-		// No operation
-		break;
-	case VK_BLEND_OP_DST_EXT:
-		oC.x = pixel.x;
-		oC.y = pixel.y;
-		oC.z = pixel.z;
-		break;
-	case VK_BLEND_OP_ZERO_EXT:
-		oC.x = Float4(0.0f);
-		oC.y = Float4(0.0f);
-		oC.z = Float4(0.0f);
-		break;
-	default:
-		UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
+		case VK_BLEND_OP_ADD:
+			oC.x += pixel.x;
+			oC.y += pixel.y;
+			oC.z += pixel.z;
+			break;
+		case VK_BLEND_OP_SUBTRACT:
+			oC.x -= pixel.x;
+			oC.y -= pixel.y;
+			oC.z -= pixel.z;
+			break;
+		case VK_BLEND_OP_REVERSE_SUBTRACT:
+			oC.x = pixel.x - oC.x;
+			oC.y = pixel.y - oC.y;
+			oC.z = pixel.z - oC.z;
+			break;
+		case VK_BLEND_OP_MIN:
+			oC.x = Min(oC.x, pixel.x);
+			oC.y = Min(oC.y, pixel.y);
+			oC.z = Min(oC.z, pixel.z);
+			break;
+		case VK_BLEND_OP_MAX:
+			oC.x = Max(oC.x, pixel.x);
+			oC.y = Max(oC.y, pixel.y);
+			oC.z = Max(oC.z, pixel.z);
+			break;
+		case VK_BLEND_OP_SRC_EXT:
+			// No operation
+			break;
+		case VK_BLEND_OP_DST_EXT:
+			oC.x = pixel.x;
+			oC.y = pixel.y;
+			oC.z = pixel.z;
+			break;
+		case VK_BLEND_OP_ZERO_EXT:
+			oC.x = Float4(0.0f);
+			oC.y = Float4(0.0f);
+			oC.z = Float4(0.0f);
+			break;
+		default:
+			UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
 	}
 
 	blendFactorAlpha(sourceFactor, oC, pixel, state.blendState[index].sourceBlendFactorAlpha);
@@ -1981,33 +1982,33 @@
 
 	switch(state.blendState[index].blendOperationAlpha)
 	{
-	case VK_BLEND_OP_ADD:
-		oC.w += pixel.w;
-		break;
-	case VK_BLEND_OP_SUBTRACT:
-		oC.w -= pixel.w;
-		break;
-	case VK_BLEND_OP_REVERSE_SUBTRACT:
-		pixel.w -= oC.w;
-		oC.w = pixel.w;
-		break;
-	case VK_BLEND_OP_MIN:
-		oC.w = Min(oC.w, pixel.w);
-		break;
-	case VK_BLEND_OP_MAX:
-		oC.w = Max(oC.w, pixel.w);
-		break;
-	case VK_BLEND_OP_SRC_EXT:
-		// No operation
-		break;
-	case VK_BLEND_OP_DST_EXT:
-		oC.w = pixel.w;
-		break;
-	case VK_BLEND_OP_ZERO_EXT:
-		oC.w = Float4(0.0f);
-		break;
-	default:
-		UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
+		case VK_BLEND_OP_ADD:
+			oC.w += pixel.w;
+			break;
+		case VK_BLEND_OP_SUBTRACT:
+			oC.w -= pixel.w;
+			break;
+		case VK_BLEND_OP_REVERSE_SUBTRACT:
+			pixel.w -= oC.w;
+			oC.w = pixel.w;
+			break;
+		case VK_BLEND_OP_MIN:
+			oC.w = Min(oC.w, pixel.w);
+			break;
+		case VK_BLEND_OP_MAX:
+			oC.w = Max(oC.w, pixel.w);
+			break;
+		case VK_BLEND_OP_SRC_EXT:
+			// No operation
+			break;
+		case VK_BLEND_OP_DST_EXT:
+			oC.w = pixel.w;
+			break;
+		case VK_BLEND_OP_ZERO_EXT:
+			oC.w = Float4(0.0f);
+			break;
+		default:
+			UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
 	}
 }
 
@@ -2015,49 +2016,49 @@
 {
 	switch(state.targetFormat[index])
 	{
-	case VK_FORMAT_R16_SFLOAT:
-	case VK_FORMAT_R32_SFLOAT:
-	case VK_FORMAT_R32_SINT:
-	case VK_FORMAT_R32_UINT:
-	case VK_FORMAT_R16_SINT:
-	case VK_FORMAT_R16_UINT:
-	case VK_FORMAT_R8_SINT:
-	case VK_FORMAT_R8_UINT:
-	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
-		break;
-	case VK_FORMAT_R16G16_SFLOAT:
-	case VK_FORMAT_R32G32_SFLOAT:
-	case VK_FORMAT_R32G32_SINT:
-	case VK_FORMAT_R32G32_UINT:
-	case VK_FORMAT_R16G16_SINT:
-	case VK_FORMAT_R16G16_UINT:
-	case VK_FORMAT_R8G8_SINT:
-	case VK_FORMAT_R8G8_UINT:
-		oC.z = oC.x;
-		oC.x = UnpackLow(oC.x, oC.y);
-		oC.z = UnpackHigh(oC.z, oC.y);
-		oC.y = oC.z;
-		break;
-	case VK_FORMAT_R16G16B16A16_SFLOAT:
-	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
-	case VK_FORMAT_R32G32B32A32_SFLOAT:
-	case VK_FORMAT_R32G32B32A32_SINT:
-	case VK_FORMAT_R32G32B32A32_UINT:
-	case VK_FORMAT_R16G16B16A16_SINT:
-	case VK_FORMAT_R16G16B16A16_UINT:
-	case VK_FORMAT_R8G8B8A8_SINT:
-	case VK_FORMAT_R8G8B8A8_UINT:
-	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
-	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
-		transpose4x4(oC.x, oC.y, oC.z, oC.w);
-		break;
-	default:
-		UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+		case VK_FORMAT_R16_SFLOAT:
+		case VK_FORMAT_R32_SFLOAT:
+		case VK_FORMAT_R32_SINT:
+		case VK_FORMAT_R32_UINT:
+		case VK_FORMAT_R16_SINT:
+		case VK_FORMAT_R16_UINT:
+		case VK_FORMAT_R8_SINT:
+		case VK_FORMAT_R8_UINT:
+		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+			break;
+		case VK_FORMAT_R16G16_SFLOAT:
+		case VK_FORMAT_R32G32_SFLOAT:
+		case VK_FORMAT_R32G32_SINT:
+		case VK_FORMAT_R32G32_UINT:
+		case VK_FORMAT_R16G16_SINT:
+		case VK_FORMAT_R16G16_UINT:
+		case VK_FORMAT_R8G8_SINT:
+		case VK_FORMAT_R8G8_UINT:
+			oC.z = oC.x;
+			oC.x = UnpackLow(oC.x, oC.y);
+			oC.z = UnpackHigh(oC.z, oC.y);
+			oC.y = oC.z;
+			break;
+		case VK_FORMAT_R16G16B16A16_SFLOAT:
+		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+		case VK_FORMAT_R32G32B32A32_SFLOAT:
+		case VK_FORMAT_R32G32B32A32_SINT:
+		case VK_FORMAT_R32G32B32A32_UINT:
+		case VK_FORMAT_R16G16B16A16_SINT:
+		case VK_FORMAT_R16G16B16A16_UINT:
+		case VK_FORMAT_R8G8B8A8_SINT:
+		case VK_FORMAT_R8G8B8A8_UINT:
+		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+			transpose4x4(oC.x, oC.y, oC.z, oC.w);
+			break;
+		default:
+			UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
 	}
 
 	int rgbaWriteMask = state.colorWriteActive(index);
 
-	Int xMask;   // Combination of all masks
+	Int xMask;  // Combination of all masks
 
 	if(state.depthTestActive)
 	{
@@ -2080,520 +2081,520 @@
 
 	switch(targetFormat)
 	{
-	case VK_FORMAT_R32_SFLOAT:
-	case VK_FORMAT_R32_SINT:
-	case VK_FORMAT_R32_UINT:
-		if(rgbaWriteMask & 0x00000001)
-		{
-			buffer += 4 * x;
-
-			// FIXME: movlps
-			value.x = *Pointer<Float>(buffer + 0);
-			value.y = *Pointer<Float>(buffer + 4);
-
-			buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-
-			// FIXME: movhps
-			value.z = *Pointer<Float>(buffer + 0);
-			value.w = *Pointer<Float>(buffer + 4);
-
-			oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
-			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
-			oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
-
-			// FIXME: movhps
-			*Pointer<Float>(buffer + 0) = oC.x.z;
-			*Pointer<Float>(buffer + 4) = oC.x.w;
-
-			buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-
-			// FIXME: movlps
-			*Pointer<Float>(buffer + 0) = oC.x.x;
-			*Pointer<Float>(buffer + 4) = oC.x.y;
-		}
-		break;
-	case VK_FORMAT_R16_SFLOAT:
-		if(rgbaWriteMask & 0x00000001)
-		{
-			buffer += 2 * x;
-
-			value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
-			value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
-
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
-			value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
-			value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
-
-			oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
-			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
-			oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
-
-			*Pointer<Half>(buffer + 0) = Half(oC.x.z);
-			*Pointer<Half>(buffer + 2) = Half(oC.x.w);
-
-			buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
-			*Pointer<Half>(buffer + 0) = Half(oC.x.x);
-			*Pointer<Half>(buffer + 2) = Half(oC.x.y);
-		}
-		break;
-	case VK_FORMAT_R16_SINT:
-	case VK_FORMAT_R16_UINT:
-		if(rgbaWriteMask & 0x00000001)
-		{
-			buffer += 2 * x;
-
-			UShort4 xyzw;
-			xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
-
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
-			xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
-			value = As<Float4>(Int4(xyzw));
-
-			oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
-			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
-			oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
-
-			if(targetFormat == VK_FORMAT_R16_SINT)
+		case VK_FORMAT_R32_SFLOAT:
+		case VK_FORMAT_R32_SINT:
+		case VK_FORMAT_R32_UINT:
+			if(rgbaWriteMask & 0x00000001)
 			{
-				Float component = oC.x.z;
-				*Pointer<Short>(buffer + 0) = Short(As<Int>(component));
-				component = oC.x.w;
-				*Pointer<Short>(buffer + 2) = Short(As<Int>(component));
+				buffer += 4 * x;
+
+				// FIXME: movlps
+				value.x = *Pointer<Float>(buffer + 0);
+				value.y = *Pointer<Float>(buffer + 4);
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				// FIXME: movhps
+				value.z = *Pointer<Float>(buffer + 0);
+				value.w = *Pointer<Float>(buffer + 4);
+
+				oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
+				value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
+				oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
+
+				// FIXME: movhps
+				*Pointer<Float>(buffer + 0) = oC.x.z;
+				*Pointer<Float>(buffer + 4) = oC.x.w;
 
 				buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 
-				component = oC.x.x;
-				*Pointer<Short>(buffer + 0) = Short(As<Int>(component));
-				component = oC.x.y;
-				*Pointer<Short>(buffer + 2) = Short(As<Int>(component));
+				// FIXME: movlps
+				*Pointer<Float>(buffer + 0) = oC.x.x;
+				*Pointer<Float>(buffer + 4) = oC.x.y;
 			}
-			else // VK_FORMAT_R16_UINT
+			break;
+		case VK_FORMAT_R16_SFLOAT:
+			if(rgbaWriteMask & 0x00000001)
 			{
-				Float component = oC.x.z;
-				*Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
-				component = oC.x.w;
-				*Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
+				buffer += 2 * x;
+
+				value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
+				value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
+				value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
+
+				oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
+				value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
+				oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
+
+				*Pointer<Half>(buffer + 0) = Half(oC.x.z);
+				*Pointer<Half>(buffer + 2) = Half(oC.x.w);
 
 				buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 
-				component = oC.x.x;
-				*Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
-				component = oC.x.y;
-				*Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
+				*Pointer<Half>(buffer + 0) = Half(oC.x.x);
+				*Pointer<Half>(buffer + 2) = Half(oC.x.y);
 			}
-		}
-		break;
-	case VK_FORMAT_R8_SINT:
-	case VK_FORMAT_R8_UINT:
-		if(rgbaWriteMask & 0x00000001)
-		{
-			buffer += x;
-
-			UInt xyzw, packedCol;
-
-			xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-			xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
-
-			Short4 tmpCol = Short4(As<Int4>(oC.x));
-			if(targetFormat == VK_FORMAT_R8_SINT)
+			break;
+		case VK_FORMAT_R16_SINT:
+		case VK_FORMAT_R16_UINT:
+			if(rgbaWriteMask & 0x00000001)
 			{
-				tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
+				buffer += 2 * x;
+
+				UShort4 xyzw;
+				xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
+				value = As<Float4>(Int4(xyzw));
+
+				oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
+				value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
+				oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
+
+				if(targetFormat == VK_FORMAT_R16_SINT)
+				{
+					Float component = oC.x.z;
+					*Pointer<Short>(buffer + 0) = Short(As<Int>(component));
+					component = oC.x.w;
+					*Pointer<Short>(buffer + 2) = Short(As<Int>(component));
+
+					buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+					component = oC.x.x;
+					*Pointer<Short>(buffer + 0) = Short(As<Int>(component));
+					component = oC.x.y;
+					*Pointer<Short>(buffer + 2) = Short(As<Int>(component));
+				}
+				else  // VK_FORMAT_R16_UINT
+				{
+					Float component = oC.x.z;
+					*Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
+					component = oC.x.w;
+					*Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
+
+					buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+					component = oC.x.x;
+					*Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
+					component = oC.x.y;
+					*Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
+				}
 			}
-			else
+			break;
+		case VK_FORMAT_R8_SINT:
+		case VK_FORMAT_R8_UINT:
+			if(rgbaWriteMask & 0x00000001)
 			{
-				tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
+				buffer += x;
+
+				UInt xyzw, packedCol;
+
+				xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
+
+				Short4 tmpCol = Short4(As<Int4>(oC.x));
+				if(targetFormat == VK_FORMAT_R8_SINT)
+				{
+					tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
+				}
+				else
+				{
+					tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
+				}
+				packedCol = Extract(As<Int2>(tmpCol), 0);
+
+				packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
+				            (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
+
+				*Pointer<UShort>(buffer) = UShort(packedCol >> 16);
+				buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				*Pointer<UShort>(buffer) = UShort(packedCol);
 			}
-			packedCol = Extract(As<Int2>(tmpCol), 0);
+			break;
+		case VK_FORMAT_R32G32_SFLOAT:
+		case VK_FORMAT_R32G32_SINT:
+		case VK_FORMAT_R32G32_UINT:
+			buffer += 8 * x;
 
-			packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
-			            (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
+			value = *Pointer<Float4>(buffer);
 
-			*Pointer<UShort>(buffer) = UShort(packedCol >> 16);
-			buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-			*Pointer<UShort>(buffer) = UShort(packedCol);
-		}
-		break;
-	case VK_FORMAT_R32G32_SFLOAT:
-	case VK_FORMAT_R32G32_SINT:
-	case VK_FORMAT_R32G32_UINT:
-		buffer += 8 * x;
-
-		value = *Pointer<Float4>(buffer);
-
-		if((rgbaWriteMask & 0x00000003) != 0x00000003)
-		{
-			Float4 masked = value;
-			oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
-			masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
-			oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
-		}
-
-		oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
-		value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
-		oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
-		*Pointer<Float4>(buffer) = oC.x;
-
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-
-		value = *Pointer<Float4>(buffer);
-
-		if((rgbaWriteMask & 0x00000003) != 0x00000003)
-		{
-			Float4 masked;
-
-			masked = value;
-			oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
-			masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
-			oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
-		}
-
-		oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
-		value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
-		oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
-		*Pointer<Float4>(buffer) = oC.y;
-		break;
-	case VK_FORMAT_R16G16_SFLOAT:
-		if((rgbaWriteMask & 0x00000003) != 0x0)
-		{
-			buffer += 4 * x;
-
-			UInt2 rgbaMask;
-			UInt2 packedCol;
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
-
-			UShort4 value = *Pointer<UShort4>(buffer);
-			UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
-			if((rgbaWriteMask & 0x3) != 0x3)
-			{
-				Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
-				rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
-				mergedMask &= rgbaMask;
-			}
-			*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
-
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1);
-			value = *Pointer<UShort4>(buffer);
-			mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
-			if((rgbaWriteMask & 0x3) != 0x3)
-			{
-				mergedMask &= rgbaMask;
-			}
-			*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
-		}
-		break;
-	case VK_FORMAT_R16G16_SINT:
-	case VK_FORMAT_R16G16_UINT:
-		if((rgbaWriteMask & 0x00000003) != 0x0)
-		{
-			buffer += 4 * x;
-
-			UInt2 rgbaMask;
-			UShort4 packedCol = UShort4(As<Int4>(oC.x));
-			UShort4 value = *Pointer<UShort4>(buffer);
-			UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
-			if((rgbaWriteMask & 0x3) != 0x3)
-			{
-				Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
-				rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
-				mergedMask &= rgbaMask;
-			}
-			*Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
-
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
-			packedCol = UShort4(As<Int4>(oC.y));
-			value = *Pointer<UShort4>(buffer);
-			mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
-			if((rgbaWriteMask & 0x3) != 0x3)
-			{
-				mergedMask &= rgbaMask;
-			}
-			*Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
-		}
-		break;
-	case VK_FORMAT_R8G8_SINT:
-	case VK_FORMAT_R8G8_UINT:
-		if((rgbaWriteMask & 0x00000003) != 0x0)
-		{
-			buffer += 2 * x;
-
-			Int2 xyzw, packedCol;
-
-			xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-			xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
-
-			if(targetFormat == VK_FORMAT_R8G8_SINT)
-			{
-				packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
-			}
-			else
-			{
-				packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
-			}
-
-			UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
-			if((rgbaWriteMask & 0x3) != 0x3)
-			{
-				Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
-				UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
-				mergedMask &= rgbaMask;
-			}
-
-			packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
-
-			*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
-			buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-			*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
-		}
-		break;
-	case VK_FORMAT_R32G32B32A32_SFLOAT:
-	case VK_FORMAT_R32G32B32A32_SINT:
-	case VK_FORMAT_R32G32B32A32_UINT:
-		buffer += 16 * x;
-
-		{
-			value = *Pointer<Float4>(buffer, 16);
-
-			if(rgbaWriteMask != 0x0000000F)
+			if((rgbaWriteMask & 0x00000003) != 0x00000003)
 			{
 				Float4 masked = value;
-				oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
-				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
+				oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0])));
+				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0])));
 				oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
 			}
 
-			oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
-			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
+			oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16, 16));
+			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ01X) + xMask * 16, 16));
 			oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
-			*Pointer<Float4>(buffer, 16) = oC.x;
-		}
+			*Pointer<Float4>(buffer) = oC.x;
 
-		{
-			value = *Pointer<Float4>(buffer + 16, 16);
+			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 
-			if(rgbaWriteMask != 0x0000000F)
+			value = *Pointer<Float4>(buffer);
+
+			if((rgbaWriteMask & 0x00000003) != 0x00000003)
 			{
-				Float4 masked = value;
-				oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
-				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
+				Float4 masked;
+
+				masked = value;
+				oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0])));
+				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0])));
 				oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
 			}
 
-			oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
-			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
+			oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16, 16));
+			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ23X) + xMask * 16, 16));
 			oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
-			*Pointer<Float4>(buffer + 16, 16) = oC.y;
-		}
-
-		buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-
-		{
-			value = *Pointer<Float4>(buffer, 16);
-
-			if(rgbaWriteMask != 0x0000000F)
+			*Pointer<Float4>(buffer) = oC.y;
+			break;
+		case VK_FORMAT_R16G16_SFLOAT:
+			if((rgbaWriteMask & 0x00000003) != 0x0)
 			{
-				Float4 masked = value;
-				oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
-				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
-				oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
+				buffer += 4 * x;
+
+				UInt2 rgbaMask;
+				UInt2 packedCol;
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
+
+				UShort4 value = *Pointer<UShort4>(buffer);
+				UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+				if((rgbaWriteMask & 0x3) != 0x3)
+				{
+					Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
+					rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+					mergedMask &= rgbaMask;
+				}
+				*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1);
+				value = *Pointer<UShort4>(buffer);
+				mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+				if((rgbaWriteMask & 0x3) != 0x3)
+				{
+					mergedMask &= rgbaMask;
+				}
+				*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
+			}
+			break;
+		case VK_FORMAT_R16G16_SINT:
+		case VK_FORMAT_R16G16_UINT:
+			if((rgbaWriteMask & 0x00000003) != 0x0)
+			{
+				buffer += 4 * x;
+
+				UInt2 rgbaMask;
+				UShort4 packedCol = UShort4(As<Int4>(oC.x));
+				UShort4 value = *Pointer<UShort4>(buffer);
+				UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+				if((rgbaWriteMask & 0x3) != 0x3)
+				{
+					Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
+					rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+					mergedMask &= rgbaMask;
+				}
+				*Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				packedCol = UShort4(As<Int4>(oC.y));
+				value = *Pointer<UShort4>(buffer);
+				mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+				if((rgbaWriteMask & 0x3) != 0x3)
+				{
+					mergedMask &= rgbaMask;
+				}
+				*Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
+			}
+			break;
+		case VK_FORMAT_R8G8_SINT:
+		case VK_FORMAT_R8G8_UINT:
+			if((rgbaWriteMask & 0x00000003) != 0x0)
+			{
+				buffer += 2 * x;
+
+				Int2 xyzw, packedCol;
+
+				xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
+
+				if(targetFormat == VK_FORMAT_R8G8_SINT)
+				{
+					packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+				}
+				else
+				{
+					packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+				}
+
+				UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
+				if((rgbaWriteMask & 0x3) != 0x3)
+				{
+					Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
+					UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+					mergedMask &= rgbaMask;
+				}
+
+				packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
+
+				*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
+				buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
+			}
+			break;
+		case VK_FORMAT_R32G32B32A32_SFLOAT:
+		case VK_FORMAT_R32G32B32A32_SINT:
+		case VK_FORMAT_R32G32B32A32_UINT:
+			buffer += 16 * x;
+
+			{
+				value = *Pointer<Float4>(buffer, 16);
+
+				if(rgbaWriteMask != 0x0000000F)
+				{
+					Float4 masked = value;
+					oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
+					masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
+					oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
+				}
+
+				oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskX0X) + xMask * 16, 16));
+				value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX0X) + xMask * 16, 16));
+				oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
+				*Pointer<Float4>(buffer, 16) = oC.x;
 			}
 
-			oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
-			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
-			oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
-			*Pointer<Float4>(buffer, 16) = oC.z;
-		}
-
-		{
-			value = *Pointer<Float4>(buffer + 16, 16);
-
-			if(rgbaWriteMask != 0x0000000F)
 			{
-				Float4 masked = value;
-				oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
-				masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
-				oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
-			}
+				value = *Pointer<Float4>(buffer + 16, 16);
 
-			oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
-			value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
-			oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
-			*Pointer<Float4>(buffer + 16, 16) = oC.w;
-		}
-		break;
-	case VK_FORMAT_R16G16B16A16_SFLOAT:
-		if((rgbaWriteMask & 0x0000000F) != 0x0)
-		{
-			buffer += 8 * x;
+				if(rgbaWriteMask != 0x0000000F)
+				{
+					Float4 masked = value;
+					oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
+					masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
+					oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
+				}
 
-			UInt4 rgbaMask;
-			UInt4 value = *Pointer<UInt4>(buffer);
-			UInt4 packedCol;
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3);
-			UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
-			if((rgbaWriteMask & 0xF) != 0xF)
-			{
-				UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
-				rgbaMask = UInt4(tmpMask, tmpMask);
-				mergedMask &= rgbaMask;
+				oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskX1X) + xMask * 16, 16));
+				value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX1X) + xMask * 16, 16));
+				oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
+				*Pointer<Float4>(buffer + 16, 16) = oC.y;
 			}
-			*Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
 
 			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 
-			value = *Pointer<UInt4>(buffer);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2);
-			packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3);
-			mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
-			if((rgbaWriteMask & 0xF) != 0xF)
 			{
-				mergedMask &= rgbaMask;
+				value = *Pointer<Float4>(buffer, 16);
+
+				if(rgbaWriteMask != 0x0000000F)
+				{
+					Float4 masked = value;
+					oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
+					masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
+					oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
+				}
+
+				oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskX2X) + xMask * 16, 16));
+				value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX2X) + xMask * 16, 16));
+				oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
+				*Pointer<Float4>(buffer, 16) = oC.z;
 			}
-			*Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
-		}
-		break;
-	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
-		if((rgbaWriteMask & 0x7) != 0x0)
-		{
-			buffer += 4 * x;
 
-			unsigned int mask = ((rgbaWriteMask & 0x1) ? 0x000007FF : 0) |
-								((rgbaWriteMask & 0x2) ? 0x003FF800 : 0) |
-								((rgbaWriteMask & 0x4) ? 0xFFC00000 : 0);
-			UInt2 mergedMask(mask, mask);
-
-			UInt2 value;
-			value = Insert(value, r11g11b10Pack(oC.x), 0);
-			value = Insert(value, r11g11b10Pack(oC.y), 1);
-			*Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-			value = Insert(value, r11g11b10Pack(oC.z), 0);
-			value = Insert(value, r11g11b10Pack(oC.w), 1);
-			*Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
-		}
-		break;
-	case VK_FORMAT_R16G16B16A16_SINT:
-	case VK_FORMAT_R16G16B16A16_UINT:
-		if((rgbaWriteMask & 0x0000000F) != 0x0)
-		{
-			buffer += 8 * x;
-
-			UInt4 rgbaMask;
-			UShort8 value = *Pointer<UShort8>(buffer);
-			UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
-			UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
-			if((rgbaWriteMask & 0xF) != 0xF)
 			{
-				UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
-				rgbaMask = UInt4(tmpMask, tmpMask);
-				mergedMask &= rgbaMask;
+				value = *Pointer<Float4>(buffer + 16, 16);
+
+				if(rgbaWriteMask != 0x0000000F)
+				{
+					Float4 masked = value;
+					oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
+					masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
+					oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
+				}
+
+				oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskX3X) + xMask * 16, 16));
+				value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX3X) + xMask * 16, 16));
+				oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
+				*Pointer<Float4>(buffer + 16, 16) = oC.w;
 			}
-			*Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
-
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
-			value = *Pointer<UShort8>(buffer);
-			packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
-			mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
-			if((rgbaWriteMask & 0xF) != 0xF)
+			break;
+		case VK_FORMAT_R16G16B16A16_SFLOAT:
+			if((rgbaWriteMask & 0x0000000F) != 0x0)
 			{
-				mergedMask &= rgbaMask;
-			}
-			*Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
-		}
-		break;
-	case VK_FORMAT_R8G8B8A8_SINT:
-	case VK_FORMAT_R8G8B8A8_UINT:
-	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
-	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
-		if((rgbaWriteMask & 0x0000000F) != 0x0)
-		{
-			UInt2 value, packedCol, mergedMask;
+				buffer += 8 * x;
 
-			buffer += 4 * x;
+				UInt4 rgbaMask;
+				UInt4 value = *Pointer<UInt4>(buffer);
+				UInt4 packedCol;
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3);
+				UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
+				if((rgbaWriteMask & 0xF) != 0xF)
+				{
+					UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+					rgbaMask = UInt4(tmpMask, tmpMask);
+					mergedMask &= rgbaMask;
+				}
+				*Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
 
-			bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32;
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 
-			if(isSigned)
-			{
-				packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+				value = *Pointer<UInt4>(buffer);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2);
+				packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3);
+				mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
+				if((rgbaWriteMask & 0xF) != 0xF)
+				{
+					mergedMask &= rgbaMask;
+				}
+				*Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
 			}
-			else
+			break;
+		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+			if((rgbaWriteMask & 0x7) != 0x0)
 			{
-				packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
-			}
-			value = *Pointer<UInt2>(buffer, 16);
-			mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
-			if(rgbaWriteMask != 0xF)
-			{
-				mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
-			}
-			*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
+				buffer += 4 * x;
 
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				unsigned int mask = ((rgbaWriteMask & 0x1) ? 0x000007FF : 0) |
+				                    ((rgbaWriteMask & 0x2) ? 0x003FF800 : 0) |
+				                    ((rgbaWriteMask & 0x4) ? 0xFFC00000 : 0);
+				UInt2 mergedMask(mask, mask);
 
-			if(isSigned)
-			{
-				packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
+				UInt2 value;
+				value = Insert(value, r11g11b10Pack(oC.x), 0);
+				value = Insert(value, r11g11b10Pack(oC.y), 1);
+				*Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				value = Insert(value, r11g11b10Pack(oC.z), 0);
+				value = Insert(value, r11g11b10Pack(oC.w), 1);
+				*Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
 			}
-			else
+			break;
+		case VK_FORMAT_R16G16B16A16_SINT:
+		case VK_FORMAT_R16G16B16A16_UINT:
+			if((rgbaWriteMask & 0x0000000F) != 0x0)
 			{
-				packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
-			}
-			value = *Pointer<UInt2>(buffer, 16);
-			mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
-			if(rgbaWriteMask != 0xF)
-			{
-				mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
-			}
-			*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
-		}
-		break;
-	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
-		if((rgbaWriteMask & 0x0000000F) != 0x0)
-		{
-			Int2 mergedMask, packedCol, value;
-			Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
-					((As<Int4>(oC.z) & Int4(0x3ff)) << 20) |
-					((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
-					((As<Int4>(oC.x) & Int4(0x3ff)));
+				buffer += 8 * x;
 
-			buffer += 4 * x;
-			value = *Pointer<Int2>(buffer, 16);
-			mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
-			if(rgbaWriteMask != 0xF)
-			{
-				mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
-			}
-			*Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
+				UInt4 rgbaMask;
+				UShort8 value = *Pointer<UShort8>(buffer);
+				UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
+				UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
+				if((rgbaWriteMask & 0xF) != 0xF)
+				{
+					UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+					rgbaMask = UInt4(tmpMask, tmpMask);
+					mergedMask &= rgbaMask;
+				}
+				*Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
 
-			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
 
-			value = *Pointer<Int2>(buffer, 16);
-			mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
-			if(rgbaWriteMask != 0xF)
-			{
-				mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+				value = *Pointer<UShort8>(buffer);
+				packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
+				mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
+				if((rgbaWriteMask & 0xF) != 0xF)
+				{
+					mergedMask &= rgbaMask;
+				}
+				*Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
 			}
-			*Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
-		}
-		break;
-	default:
-		UNIMPLEMENTED("VkFormat: %d", int(targetFormat));
+			break;
+		case VK_FORMAT_R8G8B8A8_SINT:
+		case VK_FORMAT_R8G8B8A8_UINT:
+		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+			if((rgbaWriteMask & 0x0000000F) != 0x0)
+			{
+				UInt2 value, packedCol, mergedMask;
+
+				buffer += 4 * x;
+
+				bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32;
+
+				if(isSigned)
+				{
+					packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+				}
+				else
+				{
+					packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+				}
+				value = *Pointer<UInt2>(buffer, 16);
+				mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+				if(rgbaWriteMask != 0xF)
+				{
+					mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
+				}
+				*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				if(isSigned)
+				{
+					packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
+				}
+				else
+				{
+					packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
+				}
+				value = *Pointer<UInt2>(buffer, 16);
+				mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+				if(rgbaWriteMask != 0xF)
+				{
+					mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
+				}
+				*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
+			}
+			break;
+		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+			if((rgbaWriteMask & 0x0000000F) != 0x0)
+			{
+				Int2 mergedMask, packedCol, value;
+				Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
+				              ((As<Int4>(oC.z) & Int4(0x3ff)) << 20) |
+				              ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
+				              ((As<Int4>(oC.x) & Int4(0x3ff)));
+
+				buffer += 4 * x;
+				value = *Pointer<Int2>(buffer, 16);
+				mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+				if(rgbaWriteMask != 0xF)
+				{
+					mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+				}
+				*Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				value = *Pointer<Int2>(buffer, 16);
+				mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+				if(rgbaWriteMask != 0xF)
+				{
+					mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+				}
+				*Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
+			}
+			break;
+		default:
+			UNIMPLEMENTED("VkFormat: %d", int(targetFormat));
 	}
 }
 
@@ -2604,7 +2605,7 @@
 
 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
 {
-	Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
+	Pointer<Byte> LUT = constants + OFFSET(Constants, sRGBtoLinear12_16);
 
 	c.x = As<UShort4>(c.x) >> 4;
 	c.y = As<UShort4>(c.y) >> 4;
@@ -2637,7 +2638,7 @@
 
 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
 {
-	Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
+	Pointer<Byte> LUT = constants + OFFSET(Constants, linearToSRGB12_16);
 
 	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
 	c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
@@ -2655,7 +2656,7 @@
 	c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
 }
 
-Float4 PixelRoutine::sRGBtoLinear(const Float4 &x)   // Approximates x^2.2
+Float4 PixelRoutine::sRGBtoLinear(const Float4 &x)  // Approximates x^2.2
 {
 	Float4 linear = x * x;
 	linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index 678d780..34722ad 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -26,16 +26,16 @@
 {
 public:
 	PixelRoutine(const PixelProcessor::State &state,
-		vk::PipelineLayout const *pipelineLayout,
-		SpirvShader const *spirvShader,
-		const vk::DescriptorSet::Bindings &descriptorSets);
+	             vk::PipelineLayout const *pipelineLayout,
+	             SpirvShader const *spirvShader,
+	             const vk::DescriptorSet::Bindings &descriptorSets);
 
 	virtual ~PixelRoutine();
 
 protected:
-	Float4 z[4]; // Multisampled z
-	Float4 w;    // Used as is
-	Float4 rhw;  // Reciprocal w
+	Float4 z[4];  // Multisampled z
+	Float4 w;     // Used as is
+	Float4 rhw;   // Reciprocal w
 
 	SpirvRoutine routine;
 	const vk::DescriptorSet::Bindings &descriptorSets;
@@ -43,7 +43,7 @@
 	// Depth output
 	Float4 oDepth;
 
-	virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]) = 0;
+	virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]) = 0;
 	virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) = 0;
 	virtual Bool alphaTest(Int cMask[4]) = 0;
 	virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0;
@@ -55,7 +55,7 @@
 
 	// Raster operations
 	void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s &current, const Int &x);
-	void writeColor(int index, const Pointer<Byte> &cBuffer, const Int& x, Vector4f& oC, const Int& sMask, const Int& zMask, const Int& cMask);
+	void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &oC, const Int &sMask, const Int &zMask, const Int &cMask);
 	void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4f &oC, const Int &x);
 	void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &current, const Int &sMask, const Int &zMask, const Int &cMask);
 
@@ -93,4 +93,4 @@
 
 }  // namespace sw
 
-#endif   // sw_PixelRoutine_hpp
+#endif  // sw_PixelRoutine_hpp
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index b2c2268..10d848e 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -14,35 +14,35 @@
 
 #include "SamplerCore.hpp"
 
-#include "PixelRoutine.hpp"
 #include "Constants.hpp"
-#include "Vulkan/VkSampler.hpp"
+#include "PixelRoutine.hpp"
 #include "Vulkan/VkDebug.hpp"
+#include "Vulkan/VkSampler.hpp"
 
 #include <limits>
 
 namespace {
 
-void applySwizzle(VkComponentSwizzle swizzle, sw::Float4& f, const sw::Vector4f& c, bool integer)
+void applySwizzle(VkComponentSwizzle swizzle, sw::Float4 &f, const sw::Vector4f &c, bool integer)
 {
 	switch(swizzle)
 	{
-	case VK_COMPONENT_SWIZZLE_R:    f = c.x; break;
-	case VK_COMPONENT_SWIZZLE_G:    f = c.y; break;
-	case VK_COMPONENT_SWIZZLE_B:    f = c.z; break;
-	case VK_COMPONENT_SWIZZLE_A:    f = c.w; break;
-	case VK_COMPONENT_SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
-	case VK_COMPONENT_SWIZZLE_ONE:
-		if(integer)
-		{
-			f = rr::As<sw::Float4>(sw::Int4(1, 1, 1, 1));
-		}
-		else
-		{
-			f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f);
-		}
-		break;
-	default: ASSERT(false);
+		case VK_COMPONENT_SWIZZLE_R: f = c.x; break;
+		case VK_COMPONENT_SWIZZLE_G: f = c.y; break;
+		case VK_COMPONENT_SWIZZLE_B: f = c.z; break;
+		case VK_COMPONENT_SWIZZLE_A: f = c.w; break;
+		case VK_COMPONENT_SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
+		case VK_COMPONENT_SWIZZLE_ONE:
+			if(integer)
+			{
+				f = rr::As<sw::Float4>(sw::Int4(1, 1, 1, 1));
+			}
+			else
+			{
+				f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f);
+			}
+			break;
+		default: ASSERT(false);
 	}
 }
 
@@ -50,11 +50,13 @@
 
 namespace sw {
 
-SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler &state) : constants(constants), state(state)
+SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler &state)
+    : constants(constants)
+    , state(state)
 {
 }
 
-Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4& sampleId, SamplerFunction function)
+Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4 &sampleId, SamplerFunction function)
 {
 	Vector4f c;
 
@@ -119,7 +121,8 @@
 	{
 		lod = Float(0);
 	}
-	else UNREACHABLE("Sampler function %d", int(function));
+	else
+		UNREACHABLE("Sampler function %d", int(function));
 
 	if(function != Base && function != Fetch && function != Gather)
 	{
@@ -139,7 +142,7 @@
 			}
 
 			c.x = lod;
-		//	c.y contains unclamped LOD.
+			//	c.y contains unclamped LOD.
 
 			return c;
 		}
@@ -159,31 +162,85 @@
 		{
 			switch(state.textureFormat)
 			{
+				case VK_FORMAT_R5G6B5_UNORM_PACK16:
+					c.x *= Float4(1.0f / 0xF800);
+					c.y *= Float4(1.0f / 0xFC00);
+					c.z *= Float4(1.0f / 0xF800);
+					break;
+				case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+					c.x *= Float4(1.0f / 0xF000);
+					c.y *= Float4(1.0f / 0xF000);
+					c.z *= Float4(1.0f / 0xF000);
+					c.w *= Float4(1.0f / 0xF000);
+					break;
+				case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+					c.x *= Float4(1.0f / 0xF800);
+					c.y *= Float4(1.0f / 0xF800);
+					c.z *= Float4(1.0f / 0xF800);
+					c.w *= Float4(1.0f / 0x8000);
+					break;
+				case VK_FORMAT_R8_SNORM:
+				case VK_FORMAT_R8G8_SNORM:
+				case VK_FORMAT_R8G8B8A8_SNORM:
+				case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
+					c.x *= Float4(1.0f / 0x7F00);
+					c.y *= Float4(1.0f / 0x7F00);
+					c.z *= Float4(1.0f / 0x7F00);
+					c.w *= Float4(1.0f / 0x7F00);
+					break;
+				case VK_FORMAT_R8_UNORM:
+				case VK_FORMAT_R8G8_UNORM:
+				case VK_FORMAT_R8G8B8A8_UNORM:
+				case VK_FORMAT_B8G8R8A8_UNORM:
+				case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+				case VK_FORMAT_B8G8R8A8_SRGB:
+				case VK_FORMAT_R8G8B8A8_SRGB:
+				case VK_FORMAT_R8_SRGB:
+				case VK_FORMAT_R8G8_SRGB:
+					c.x *= Float4(1.0f / 0xFF00u);
+					c.y *= Float4(1.0f / 0xFF00u);
+					c.z *= Float4(1.0f / 0xFF00u);
+					c.w *= Float4(1.0f / 0xFF00u);
+					break;
+				default:
+					for(int component = 0; component < textureComponentCount(); component++)
+					{
+						c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
+					}
+			}
+		}
+	}
+	else  // 16-bit filtering.
+	{
+		Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, function);
+
+		switch(state.textureFormat)
+		{
 			case VK_FORMAT_R5G6B5_UNORM_PACK16:
-				c.x *= Float4(1.0f / 0xF800);
-				c.y *= Float4(1.0f / 0xFC00);
-				c.z *= Float4(1.0f / 0xF800);
+				c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
+				c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
+				c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
 				break;
 			case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
-				c.x *= Float4(1.0f / 0xF000);
-				c.y *= Float4(1.0f / 0xF000);
-				c.z *= Float4(1.0f / 0xF000);
-				c.w *= Float4(1.0f / 0xF000);
+				c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000);
+				c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000);
+				c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000);
+				c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000);
 				break;
 			case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
-				c.x *= Float4(1.0f / 0xF800);
-				c.y *= Float4(1.0f / 0xF800);
-				c.z *= Float4(1.0f / 0xF800);
-				c.w *= Float4(1.0f / 0x8000);
+				c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
+				c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800);
+				c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
+				c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000);
 				break;
 			case VK_FORMAT_R8_SNORM:
 			case VK_FORMAT_R8G8_SNORM:
 			case VK_FORMAT_R8G8B8A8_SNORM:
 			case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
-				c.x *= Float4(1.0f / 0x7F00);
-				c.y *= Float4(1.0f / 0x7F00);
-				c.z *= Float4(1.0f / 0x7F00);
-				c.w *= Float4(1.0f / 0x7F00);
+				c.x = Float4(cs.x) * Float4(1.0f / 0x7F00);
+				c.y = Float4(cs.y) * Float4(1.0f / 0x7F00);
+				c.z = Float4(cs.z) * Float4(1.0f / 0x7F00);
+				c.w = Float4(cs.w) * Float4(1.0f / 0x7F00);
 				break;
 			case VK_FORMAT_R8_UNORM:
 			case VK_FORMAT_R8G8_UNORM:
@@ -194,77 +251,23 @@
 			case VK_FORMAT_R8G8B8A8_SRGB:
 			case VK_FORMAT_R8_SRGB:
 			case VK_FORMAT_R8G8_SRGB:
-				c.x *= Float4(1.0f / 0xFF00u);
-				c.y *= Float4(1.0f / 0xFF00u);
-				c.z *= Float4(1.0f / 0xFF00u);
-				c.w *= Float4(1.0f / 0xFF00u);
+				c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xFF00u);
+				c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFF00u);
+				c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xFF00u);
+				c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xFF00u);
 				break;
 			default:
 				for(int component = 0; component < textureComponentCount(); component++)
 				{
-					c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
+					if(hasUnsignedTextureComponent(component))
+					{
+						convertUnsigned16(c[component], cs[component]);
+					}
+					else
+					{
+						convertSigned15(c[component], cs[component]);
+					}
 				}
-			}
-		}
-	}
-	else  // 16-bit filtering.
-	{
-		Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, function);
-
-		switch(state.textureFormat)
-		{
-		case VK_FORMAT_R5G6B5_UNORM_PACK16:
-			c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
-			c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
-			c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
-			break;
-		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
-			c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000);
-			c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000);
-			c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000);
-			c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000);
-			break;
-		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
-			c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
-			c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800);
-			c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
-			c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000);
-			break;
-		case VK_FORMAT_R8_SNORM:
-		case VK_FORMAT_R8G8_SNORM:
-		case VK_FORMAT_R8G8B8A8_SNORM:
-		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
-			c.x = Float4(cs.x) * Float4(1.0f / 0x7F00);
-			c.y = Float4(cs.y) * Float4(1.0f / 0x7F00);
-			c.z = Float4(cs.z) * Float4(1.0f / 0x7F00);
-			c.w = Float4(cs.w) * Float4(1.0f / 0x7F00);
-			break;
-		case VK_FORMAT_R8_UNORM:
-		case VK_FORMAT_R8G8_UNORM:
-		case VK_FORMAT_R8G8B8A8_UNORM:
-		case VK_FORMAT_B8G8R8A8_UNORM:
-		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-		case VK_FORMAT_B8G8R8A8_SRGB:
-		case VK_FORMAT_R8G8B8A8_SRGB:
-		case VK_FORMAT_R8_SRGB:
-		case VK_FORMAT_R8G8_SRGB:
-			c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xFF00u);
-			c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFF00u);
-			c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xFF00u);
-			c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xFF00u);
-			break;
-		default:
-			for(int component = 0; component < textureComponentCount(); component++)
-			{
-				if(hasUnsignedTextureComponent(component))
-				{
-					convertUnsigned16(c[component], cs[component]);
-				}
-				else
-				{
-					convertSigned15(c[component], cs[component]);
-				}
-			}
 		}
 	}
 
@@ -321,27 +324,27 @@
 	{
 		switch(count)
 		{
-		case -1: return uvw - offset;
-		case  0: return uvw;
-		case +1: return uvw + offset;
-		case  2: return uvw + offset + offset;
+			case -1: return uvw - offset;
+			case 0: return uvw;
+			case +1: return uvw + offset;
+			case 2: return uvw + offset + offset;
 		}
 	}
-	else   // Clamp or mirror
+	else  // Clamp or mirror
 	{
 		switch(count)
 		{
-		case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
-		case  0: return uvw;
-		case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
-		case  2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
+			case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
+			case 0: return uvw;
+			case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
+			case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
 		}
 	}
 
 	return uvw;
 }
 
-Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
+Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
 {
 	Vector4s c = sampleAniso(texture, u, v, w, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, false, function);
 
@@ -356,21 +359,45 @@
 
 		lod *= Float(1 << 16);
 
-		UShort4 utri = UShort4(Float4(lod));   // FIXME: Optimize
-		Short4 stri = utri >> 1;   // FIXME: Optimize
+		UShort4 utri = UShort4(Float4(lod));  // FIXME: Optimize
+		Short4 stri = utri >> 1;              // FIXME: Optimize
 
-		if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri);
-		if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri);
-		if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri);
-		if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri);
+		if(hasUnsignedTextureComponent(0))
+			cc.x = MulHigh(As<UShort4>(cc.x), utri);
+		else
+			cc.x = MulHigh(cc.x, stri);
+		if(hasUnsignedTextureComponent(1))
+			cc.y = MulHigh(As<UShort4>(cc.y), utri);
+		else
+			cc.y = MulHigh(cc.y, stri);
+		if(hasUnsignedTextureComponent(2))
+			cc.z = MulHigh(As<UShort4>(cc.z), utri);
+		else
+			cc.z = MulHigh(cc.z, stri);
+		if(hasUnsignedTextureComponent(3))
+			cc.w = MulHigh(As<UShort4>(cc.w), utri);
+		else
+			cc.w = MulHigh(cc.w, stri);
 
 		utri = ~utri;
 		stri = Short4(0x7FFF) - stri;
 
-		if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri);
-		if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri);
-		if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri);
-		if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri);
+		if(hasUnsignedTextureComponent(0))
+			c.x = MulHigh(As<UShort4>(c.x), utri);
+		else
+			c.x = MulHigh(c.x, stri);
+		if(hasUnsignedTextureComponent(1))
+			c.y = MulHigh(As<UShort4>(c.y), utri);
+		else
+			c.y = MulHigh(c.y, stri);
+		if(hasUnsignedTextureComponent(2))
+			c.z = MulHigh(As<UShort4>(c.z), utri);
+		else
+			c.z = MulHigh(c.z, stri);
+		if(hasUnsignedTextureComponent(3))
+			c.w = MulHigh(As<UShort4>(c.w), utri);
+		else
+			c.w = MulHigh(c.w, stri);
 
 		c.x += cc.x;
 		c.y += cc.y;
@@ -386,7 +413,7 @@
 	return c;
 }
 
-Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
+Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
 {
 	Vector4s c;
 
@@ -405,9 +432,9 @@
 		cSum.z = Short4(0);
 		cSum.w = Short4(0);
 
-		Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
-		Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
-		UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a);
+		Float4 A = *Pointer<Float4>(constants + OFFSET(Constants, uvWeight) + 16 * a);
+		Float4 B = *Pointer<Float4>(constants + OFFSET(Constants, uvStart) + 16 * a);
+		UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants, cWeight) + 8 * a);
 		Short4 sw = Short4(cw >> 1);
 
 		Float4 du = uDelta;
@@ -428,25 +455,49 @@
 			u0 += du;
 			v0 += dv;
 
-			if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw);
-			if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw);
-			if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw);
-			if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw);
+			if(hasUnsignedTextureComponent(0))
+				cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw));
+			else
+				cSum.x += MulHigh(c.x, sw);
+			if(hasUnsignedTextureComponent(1))
+				cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw));
+			else
+				cSum.y += MulHigh(c.y, sw);
+			if(hasUnsignedTextureComponent(2))
+				cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw));
+			else
+				cSum.z += MulHigh(c.z, sw);
+			if(hasUnsignedTextureComponent(3))
+				cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw));
+			else
+				cSum.w += MulHigh(c.w, sw);
 
 			i++;
 		}
 		Until(i >= a);
 
-		if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x);
-		if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y);
-		if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
-		if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
+		if(hasUnsignedTextureComponent(0))
+			c.x = cSum.x;
+		else
+			c.x = AddSat(cSum.x, cSum.x);
+		if(hasUnsignedTextureComponent(1))
+			c.y = cSum.y;
+		else
+			c.y = AddSat(cSum.y, cSum.y);
+		if(hasUnsignedTextureComponent(2))
+			c.z = cSum.z;
+		else
+			c.z = AddSat(cSum.z, cSum.z);
+		if(hasUnsignedTextureComponent(3))
+			c.w = cSum.w;
+		else
+			c.w = AddSat(cSum.w, cSum.w);
 	}
 
 	return c;
 }
 
-Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
 {
 	if(state.textureType != VK_IMAGE_VIEW_TYPE_3D)
 	{
@@ -458,7 +509,7 @@
 	}
 }
 
-Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
 {
 	Vector4s c;
 
@@ -487,21 +538,21 @@
 	}
 	else
 	{
-		Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
-		Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
-		Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
-		Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
+		Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
+		Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
+		Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
+		Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
 
 		Vector4s c00 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function);
 		Vector4s c10 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function);
 		Vector4s c01 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function);
 		Vector4s c11 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function);
 
-		if(!gather)   // Blend
+		if(!gather)  // Blend
 		{
 			// Fractions
-			UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,width)));
-			UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,height)));
+			UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width)));
+			UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height)));
 
 			UShort4 f1u = ~f0u;
 			UShort4 f1v = ~f0v;
@@ -532,7 +583,7 @@
 				{
 					c00.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0u) + MulHigh(As<UShort4>(c10.x), f0u);
 					c01.x = As<UShort4>(c01.x) - MulHigh(As<UShort4>(c01.x), f0u) + MulHigh(As<UShort4>(c11.x), f0u);
-					c.x   = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v);
+					c.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v);
 				}
 				else
 				{
@@ -552,7 +603,7 @@
 					}
 
 					c.x = (c00.x + c10.x) + (c01.x + c11.x);
-					if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x);   // Correct for signed fractions
+					if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x);  // Correct for signed fractions
 				}
 			}
 
@@ -562,7 +613,7 @@
 				{
 					c00.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0u) + MulHigh(As<UShort4>(c10.y), f0u);
 					c01.y = As<UShort4>(c01.y) - MulHigh(As<UShort4>(c01.y), f0u) + MulHigh(As<UShort4>(c11.y), f0u);
-					c.y   = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v);
+					c.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v);
 				}
 				else
 				{
@@ -582,7 +633,7 @@
 					}
 
 					c.y = (c00.y + c10.y) + (c01.y + c11.y);
-					if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y);   // Correct for signed fractions
+					if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y);  // Correct for signed fractions
 				}
 			}
 
@@ -592,7 +643,7 @@
 				{
 					c00.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0u) + MulHigh(As<UShort4>(c10.z), f0u);
 					c01.z = As<UShort4>(c01.z) - MulHigh(As<UShort4>(c01.z), f0u) + MulHigh(As<UShort4>(c11.z), f0u);
-					c.z   = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v);
+					c.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v);
 				}
 				else
 				{
@@ -612,7 +663,7 @@
 					}
 
 					c.z = (c00.z + c10.z) + (c01.z + c11.z);
-					if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z);   // Correct for signed fractions
+					if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z);  // Correct for signed fractions
 				}
 			}
 
@@ -622,7 +673,7 @@
 				{
 					c00.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0u) + MulHigh(As<UShort4>(c10.w), f0u);
 					c01.w = As<UShort4>(c01.w) - MulHigh(As<UShort4>(c01.w), f0u) + MulHigh(As<UShort4>(c11.w), f0u);
-					c.w  = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v);
+					c.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v);
 				}
 				else
 				{
@@ -642,7 +693,7 @@
 					}
 
 					c.w = (c00.w + c10.w) + (c01.w + c11.w);
-					if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w);   // Correct for signed fractions
+					if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w);  // Correct for signed fractions
 				}
 			}
 		}
@@ -651,16 +702,16 @@
 			VkComponentSwizzle swizzle = gatherSwizzle();
 			switch(swizzle)
 			{
-			case VK_COMPONENT_SWIZZLE_ZERO:
-			case VK_COMPONENT_SWIZZLE_ONE:
-				// Handled at the final component swizzle.
-				break;
-			default:
-				c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
-				c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
-				c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
-				c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
-				break;
+				case VK_COMPONENT_SWIZZLE_ZERO:
+				case VK_COMPONENT_SWIZZLE_ONE:
+					// Handled at the final component swizzle.
+					break;
+				default:
+					c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
+					break;
 			}
 		}
 	}
@@ -668,7 +719,7 @@
 	return c;
 }
 
-Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
 {
 	Vector4s c_;
 
@@ -708,17 +759,17 @@
 			{
 				for(int k = 0; k < 2; k++)
 				{
-					u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
-					v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
-					s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
+					u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
+					v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
+					s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap, wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
 				}
 			}
 		}
 
 		// Fractions
-		UShort4 f0u = As<UShort4>(u[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,width)));
-		UShort4 f0v = As<UShort4>(v[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,height)));
-		UShort4 f0s = As<UShort4>(s[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,depth)));
+		UShort4 f0u = As<UShort4>(u[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width)));
+		UShort4 f0v = As<UShort4>(v[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height)));
+		UShort4 f0s = As<UShort4>(s[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, depth)));
 
 		UShort4 f1u = ~f0u;
 		UShort4 f1v = ~f0v;
@@ -766,10 +817,34 @@
 				{
 					c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, cubeArrayId, sampleId, buffer, function);
 
-					if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
-					if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
-					if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); }
-					if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); }
+					if(componentCount >= 1)
+					{
+						if(hasUnsignedTextureComponent(0))
+							c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]);
+						else
+							c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]);
+					}
+					if(componentCount >= 2)
+					{
+						if(hasUnsignedTextureComponent(1))
+							c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]);
+						else
+							c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]);
+					}
+					if(componentCount >= 3)
+					{
+						if(hasUnsignedTextureComponent(2))
+							c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]);
+						else
+							c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]);
+					}
+					if(componentCount >= 4)
+					{
+						if(hasUnsignedTextureComponent(3))
+							c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]);
+						else
+							c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]);
+					}
 
 					if(i != 0 || j != 0 || k != 0)
 					{
@@ -788,16 +863,20 @@
 		if(componentCount >= 4) c_.w = c[0][0][0].w;
 
 		// Correct for signed fractions
-		if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
-		if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
-		if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
-		if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
+		if(componentCount >= 1)
+			if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
+		if(componentCount >= 2)
+			if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
+		if(componentCount >= 3)
+			if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
+		if(componentCount >= 4)
+			if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
 	}
 
 	return c_;
 }
 
-Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
+Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
 {
 	Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, false, function);
 
@@ -821,7 +900,7 @@
 	return c;
 }
 
-Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
+Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
 {
 	Vector4f c;
 
@@ -840,8 +919,8 @@
 		cSum.z = Float4(0.0f);
 		cSum.w = Float4(0.0f);
 
-		Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
-		Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
+		Float4 A = *Pointer<Float4>(constants + OFFSET(Constants, uvWeight) + 16 * a);
+		Float4 B = *Pointer<Float4>(constants + OFFSET(Constants, uvStart) + 16 * a);
 
 		Float4 du = uDelta;
 		Float4 dv = vDelta;
@@ -879,7 +958,7 @@
 	return c;
 }
 
-Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
 {
 	if(state.textureType != VK_IMAGE_VIEW_TYPE_3D)
 	{
@@ -891,7 +970,7 @@
 	}
 }
 
-Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
 {
 	Vector4f c;
 
@@ -935,7 +1014,7 @@
 		Vector4f c01 = sampleTexel(x0, y1, z0, q, mipmap, cubeArrayId, sampleId, buffer, function);
 		Vector4f c11 = sampleTexel(x1, y1, z0, q, mipmap, cubeArrayId, sampleId, buffer, function);
 
-		if(!gather)   // Blend
+		if(!gather)  // Blend
 		{
 			if(componentCount >= 1) c00.x = c00.x + fu * (c10.x - c00.x);
 			if(componentCount >= 2) c00.y = c00.y + fu * (c10.y - c00.y);
@@ -957,16 +1036,16 @@
 			VkComponentSwizzle swizzle = gatherSwizzle();
 			switch(swizzle)
 			{
-			case VK_COMPONENT_SWIZZLE_ZERO:
-			case VK_COMPONENT_SWIZZLE_ONE:
-				// Handled at the final component swizzle.
-				break;
-			default:
-				c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
-				c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
-				c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
-				c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
-				break;
+				case VK_COMPONENT_SWIZZLE_ZERO:
+				case VK_COMPONENT_SWIZZLE_ONE:
+					// Handled at the final component swizzle.
+					break;
+				default:
+					c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
+					break;
 			}
 		}
 	}
@@ -974,7 +1053,7 @@
 	return c;
 }
 
-Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
 {
 	Vector4f c;
 
@@ -1065,18 +1144,18 @@
 Float SamplerCore::log2sqrt(Float lod)
 {
 	// log2(sqrt(lod))                               // Equals 0.25 * log2(lod^2).
-	lod *= lod;                                      // Squaring doubles the exponent and produces an extra bit of precision.
-	lod = Float(As<Int>(lod)) - Float(0x3F800000);   // Interpret as integer and subtract the exponent bias.
-	lod *= As<Float>(Int(0x33000000));               // Scale by 0.25 * 2^-23 (mantissa length).
+	lod *= lod;                                     // Squaring doubles the exponent and produces an extra bit of precision.
+	lod = Float(As<Int>(lod)) - Float(0x3F800000);  // Interpret as integer and subtract the exponent bias.
+	lod *= As<Float>(Int(0x33000000));              // Scale by 0.25 * 2^-23 (mantissa length).
 
 	return lod;
 }
 
 Float SamplerCore::log2(Float lod)
 {
-	lod *= lod;                                      // Squaring doubles the exponent and produces an extra bit of precision.
-	lod = Float(As<Int>(lod)) - Float(0x3F800000);   // Interpret as integer and subtract the exponent bias.
-	lod *= As<Float>(Int(0x33800000));               // Scale by 0.5 * 2^-23 (mantissa length).
+	lod *= lod;                                     // Squaring doubles the exponent and produces an extra bit of precision.
+	lod = Float(As<Int>(lod)) - Float(0x3F800000);  // Interpret as integer and subtract the exponent bias.
+	lod *= As<Float>(Int(0x33800000));              // Scale by 0.5 * 2^-23 (mantissa length).
 
 	return lod;
 }
@@ -1085,7 +1164,7 @@
 {
 	Float4 duvdxy;
 
-	if(function != Grad)   // Implicit
+	if(function != Grad)  // Implicit
 	{
 		duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
 	}
@@ -1103,7 +1182,7 @@
 	Float4 dUV2dxy = dUVdxy * dUVdxy;
 	Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
 
-	lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
+	lod = Max(Float(dUV2.x), Float(dUV2.y));  // Square length of major axis
 
 	if(state.textureFilter == FILTER_ANISOTROPIC)
 	{
@@ -1119,12 +1198,12 @@
 		vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
 
 		anisotropy = lod * Rcp_pp(det);
-		anisotropy = Min(anisotropy, *Pointer<Float>(sampler + OFFSET(vk::Sampler,maxAnisotropy)));
+		anisotropy = Min(anisotropy, *Pointer<Float>(sampler + OFFSET(vk::Sampler, maxAnisotropy)));
 
 		lod *= Rcp_pp(anisotropy * anisotropy);
 	}
 
-	lod = log2sqrt(lod);   // log2(sqrt(lod))
+	lod = log2sqrt(lod);  // log2(sqrt(lod))
 }
 
 void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, Float4 &M, SamplerFunction function)
@@ -1160,10 +1239,10 @@
 
 	dudxy = Max(Max(duvdxy, dusdxy), dvsdxy);
 
-	lod = Max(Float(dudxy.y), Float(dudxy.z));   // FIXME: Max(dudxy.y, dudxy.z);
+	lod = Max(Float(dudxy.y), Float(dudxy.z));  // FIXME: Max(dudxy.y, dudxy.z);
 
 	// Scale by texture dimension.
-	lod *= *Pointer<Float>(texture + OFFSET(Texture,width));
+	lod *= *Pointer<Float>(texture + OFFSET(Texture, width));
 
 	lod = log2(lod);
 }
@@ -1172,7 +1251,7 @@
 {
 	Float4 dudxy, dvdxy, dsdxy;
 
-	if(function != Grad)   // Implicit
+	if(function != Grad)  // Implicit
 	{
 		dudxy = uuuu - uuuu.xxxx;
 		dvdxy = vvvv - vvvv.xxxx;
@@ -1197,9 +1276,9 @@
 	dudxy += dvdxy;
 	dudxy += dsdxy;
 
-	lod = Max(Float(dudxy.y), Float(dudxy.z));   // FIXME: Max(dudxy.y, dudxy.z);
+	lod = Max(Float(dudxy.y), Float(dudxy.z));  // FIXME: Max(dudxy.y, dudxy.z);
 
-	lod = log2sqrt(lod);   // log2(sqrt(lod))
+	lod = log2sqrt(lod);  // log2(sqrt(lod))
 }
 
 Int4 SamplerCore::cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
@@ -1207,20 +1286,20 @@
 	// TODO: Comply with Vulkan recommendation:
 	// Vulkan 1.1: "The rules should have as the first rule that rz wins over ry and rx, and the second rule that ry wins over rx."
 
-	Int4 xn = CmpLT(x, Float4(0.0f));   // x < 0
-	Int4 yn = CmpLT(y, Float4(0.0f));   // y < 0
-	Int4 zn = CmpLT(z, Float4(0.0f));   // z < 0
+	Int4 xn = CmpLT(x, Float4(0.0f));  // x < 0
+	Int4 yn = CmpLT(y, Float4(0.0f));  // y < 0
+	Int4 zn = CmpLT(z, Float4(0.0f));  // z < 0
 
 	Float4 absX = Abs(x);
 	Float4 absY = Abs(y);
 	Float4 absZ = Abs(z);
 
-	Int4 xy = CmpNLE(absX, absY);   // abs(x) > abs(y)
-	Int4 yz = CmpNLE(absY, absZ);   // abs(y) > abs(z)
-	Int4 zx = CmpNLE(absZ, absX);   // abs(z) > abs(x)
-	Int4 xMajor = xy & ~zx;   // abs(x) > abs(y) && abs(x) > abs(z)
-	Int4 yMajor = yz & ~xy;   // abs(y) > abs(z) && abs(y) > abs(x)
-	Int4 zMajor = zx & ~yz;   // abs(z) > abs(x) && abs(z) > abs(y)
+	Int4 xy = CmpNLE(absX, absY);  // abs(x) > abs(y)
+	Int4 yz = CmpNLE(absY, absZ);  // abs(y) > abs(z)
+	Int4 zx = CmpNLE(absZ, absX);  // abs(z) > abs(x)
+	Int4 xMajor = xy & ~zx;        // abs(x) > abs(y) && abs(x) > abs(z)
+	Int4 yMajor = yz & ~xy;        // abs(y) > abs(z) && abs(y) > abs(x)
+	Int4 zMajor = zx & ~yz;        // abs(z) > abs(x) && abs(z) > abs(y)
 
 	// FACE_POSITIVE_X = 000b
 	// FACE_NEGATIVE_X = 001b
@@ -1235,14 +1314,14 @@
 	Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
 	Int negative = SignMask(n);
 
-	Int faces = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
-	faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
-	faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
+	Int faces = *Pointer<Int>(constants + OFFSET(Constants, transposeBit0) + negative * 4);
+	faces |= *Pointer<Int>(constants + OFFSET(Constants, transposeBit1) + yAxis * 4);
+	faces |= *Pointer<Int>(constants + OFFSET(Constants, transposeBit2) + zAxis * 4);
 
 	Int4 face;
 	face.x = faces & 0x7;
-	face.y = (faces >> 4)  & 0x7;
-	face.z = (faces >> 8)  & 0x7;
+	face.y = (faces >> 4) & 0x7;
+	face.z = (faces >> 8) & 0x7;
 	face.w = (faces >> 12) & 0x7;
 
 	M = Max(Max(absX, absY), Max(absZ, Float4(std::numeric_limits<float>::min())));
@@ -1267,27 +1346,27 @@
 
 	switch(mode)
 	{
-	case AddressingMode::ADDRESSING_WRAP:
-		tmp = (tmp + whd * Int4(-MIN_TEXEL_OFFSET)) % whd;
-		break;
-	case AddressingMode::ADDRESSING_CLAMP:
-	case AddressingMode::ADDRESSING_MIRROR:
-	case AddressingMode::ADDRESSING_MIRRORONCE:
-	case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
-		tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
-		break;
-	case ADDRESSING_TEXELFETCH:
-		break;
-	case AddressingMode::ADDRESSING_SEAMLESS:
-		ASSERT(false);   // Cube sampling doesn't support offset.
-	default:
-		ASSERT(false);
+		case AddressingMode::ADDRESSING_WRAP:
+			tmp = (tmp + whd * Int4(-MIN_TEXEL_OFFSET)) % whd;
+			break;
+		case AddressingMode::ADDRESSING_CLAMP:
+		case AddressingMode::ADDRESSING_MIRROR:
+		case AddressingMode::ADDRESSING_MIRRORONCE:
+		case AddressingMode::ADDRESSING_BORDER:  // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
+			tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
+			break;
+		case ADDRESSING_TEXELFETCH:
+			break;
+		case AddressingMode::ADDRESSING_SEAMLESS:
+			ASSERT(false);  // Cube sampling doesn't support offset.
+		default:
+			ASSERT(false);
 	}
 
 	return As<Short4>(UShort4(tmp));
 }
 
-void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, SamplerFunction function)
+void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function)
 {
 	bool texelFetch = (function == Fetch);
 	bool hasOffset = (function.offset != 0);
@@ -1309,8 +1388,8 @@
 	Short4 uuu2 = uuuu;
 	uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
 	uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
-	uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
-	uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
+	uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap, onePitchP))));
+	uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap, onePitchP))));
 
 	if(hasThirdCoordinate())
 	{
@@ -1380,7 +1459,7 @@
 	}
 }
 
-void SamplerCore::computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, SamplerFunction function)
+void SamplerCore::computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function)
 {
 	UInt4 indices = uuuu + vvvv;
 
@@ -1426,32 +1505,32 @@
 
 		switch(state.textureFormat)
 		{
-		case VK_FORMAT_R5G6B5_UNORM_PACK16:
-			c.z = (c.x & Short4(0x001Fu)) << 11;
-			c.y = (c.x & Short4(0x07E0u)) << 5;
-			c.x = (c.x & Short4(0xF800u));
-			break;
-		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
-			c.w = (c.x << 12) & Short4(0xF000u);
-			c.z = (c.x) & Short4(0xF000u);
-			c.y = (c.x << 4) & Short4(0xF000u);
-			c.x = (c.x << 8) & Short4(0xF000u);
-			break;
-		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
-			c.w = (c.x) & Short4(0x8000u);
-			c.z = (c.x << 11) & Short4(0xF800u);
-			c.y = (c.x << 6) & Short4(0xF800u);
-			c.x = (c.x << 1) & Short4(0xF800u);
-			break;
-		default:
-			ASSERT(false);
+			case VK_FORMAT_R5G6B5_UNORM_PACK16:
+				c.z = (c.x & Short4(0x001Fu)) << 11;
+				c.y = (c.x & Short4(0x07E0u)) << 5;
+				c.x = (c.x & Short4(0xF800u));
+				break;
+			case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+				c.w = (c.x << 12) & Short4(0xF000u);
+				c.z = (c.x) & Short4(0xF000u);
+				c.y = (c.x << 4) & Short4(0xF000u);
+				c.x = (c.x << 8) & Short4(0xF000u);
+				break;
+			case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+				c.w = (c.x) & Short4(0x8000u);
+				c.z = (c.x << 11) & Short4(0xF800u);
+				c.y = (c.x << 6) & Short4(0xF800u);
+				c.x = (c.x << 1) & Short4(0xF800u);
+				break;
+			default:
+				ASSERT(false);
 		}
 	}
 	else if(has8bitTextureComponents())
 	{
 		switch(textureComponentCount())
 		{
-		case 4:
+			case 4:
 			{
 				Byte4 c0 = Pointer<Byte4>(buffer)[index[0]];
 				Byte4 c1 = Pointer<Byte4>(buffer)[index[1]];
@@ -1462,80 +1541,80 @@
 
 				switch(state.textureFormat)
 				{
-				case VK_FORMAT_B8G8R8A8_UNORM:
-				case VK_FORMAT_B8G8R8A8_SRGB:
-					c.z = As<Short4>(UnpackLow(c.x, c.y));
-					c.x = As<Short4>(UnpackHigh(c.x, c.y));
-					c.y = c.z;
-					c.w = c.x;
-					c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
-					c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
-					c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
-					c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
-					break;
-				case VK_FORMAT_R8G8B8A8_UNORM:
-				case VK_FORMAT_R8G8B8A8_SINT:
-				case VK_FORMAT_R8G8B8A8_SNORM:
-				case VK_FORMAT_R8G8B8A8_SRGB:
-					c.z = As<Short4>(UnpackHigh(c.x, c.y));
-					c.x = As<Short4>(UnpackLow(c.x, c.y));
-					c.y = c.x;
-					c.w = c.z;
-					c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
-					c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
-					c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
-					c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
-					// Propagate sign bit
-					if(state.textureFormat == VK_FORMAT_R8G8B8A8_SINT)
-					{
-						c.x >>= 8;
-						c.y >>= 8;
-						c.z >>= 8;
-						c.w >>= 8;
-					}
-					break;
-				case VK_FORMAT_R8G8B8A8_UINT:
-					c.z = As<Short4>(UnpackHigh(c.x, c.y));
-					c.x = As<Short4>(UnpackLow(c.x, c.y));
-					c.y = c.x;
-					c.w = c.z;
-					c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
-					c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
-					c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
-					c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
-					break;
-				default:
-					ASSERT(false);
+					case VK_FORMAT_B8G8R8A8_UNORM:
+					case VK_FORMAT_B8G8R8A8_SRGB:
+						c.z = As<Short4>(UnpackLow(c.x, c.y));
+						c.x = As<Short4>(UnpackHigh(c.x, c.y));
+						c.y = c.z;
+						c.w = c.x;
+						c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
+						c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
+						c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
+						c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
+						break;
+					case VK_FORMAT_R8G8B8A8_UNORM:
+					case VK_FORMAT_R8G8B8A8_SINT:
+					case VK_FORMAT_R8G8B8A8_SNORM:
+					case VK_FORMAT_R8G8B8A8_SRGB:
+						c.z = As<Short4>(UnpackHigh(c.x, c.y));
+						c.x = As<Short4>(UnpackLow(c.x, c.y));
+						c.y = c.x;
+						c.w = c.z;
+						c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
+						c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
+						c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
+						c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
+						// Propagate sign bit
+						if(state.textureFormat == VK_FORMAT_R8G8B8A8_SINT)
+						{
+							c.x >>= 8;
+							c.y >>= 8;
+							c.z >>= 8;
+							c.w >>= 8;
+						}
+						break;
+					case VK_FORMAT_R8G8B8A8_UINT:
+						c.z = As<Short4>(UnpackHigh(c.x, c.y));
+						c.x = As<Short4>(UnpackLow(c.x, c.y));
+						c.y = c.x;
+						c.w = c.z;
+						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
+						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
+						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
+						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
+						break;
+					default:
+						ASSERT(false);
 				}
 			}
 			break;
-		case 2:
-			c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
-			c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
-			c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
-			c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
+			case 2:
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
 
-			switch(state.textureFormat)
-			{
-			case VK_FORMAT_R8G8_UNORM:
-			case VK_FORMAT_R8G8_SNORM:
-			case VK_FORMAT_R8G8_SRGB:
-				c.y = (c.x & Short4(0xFF00u));
-				c.x = (c.x << 8);
+				switch(state.textureFormat)
+				{
+					case VK_FORMAT_R8G8_UNORM:
+					case VK_FORMAT_R8G8_SNORM:
+					case VK_FORMAT_R8G8_SRGB:
+						c.y = (c.x & Short4(0xFF00u));
+						c.x = (c.x << 8);
+						break;
+					case VK_FORMAT_R8G8_SINT:
+						c.y = c.x >> 8;
+						c.x = (c.x << 8) >> 8;  // Propagate sign bit
+						break;
+					case VK_FORMAT_R8G8_UINT:
+						c.y = As<Short4>(As<UShort4>(c.x) >> 8);
+						c.x &= Short4(0x00FFu);
+						break;
+					default:
+						ASSERT(false);
+				}
 				break;
-			case VK_FORMAT_R8G8_SINT:
-				c.y = c.x >> 8;
-				c.x = (c.x << 8) >> 8; // Propagate sign bit
-				break;
-			case VK_FORMAT_R8G8_UINT:
-				c.y = As<Short4>(As<UShort4>(c.x) >> 8);
-				c.x &= Short4(0x00FFu);
-				break;
-			default:
-				ASSERT(false);
-			}
-			break;
-		case 1:
+			case 1:
 			{
 				Int c0 = Int(*Pointer<Byte>(buffer + index[0]));
 				Int c1 = Int(*Pointer<Byte>(buffer + index[1]));
@@ -1545,9 +1624,9 @@
 
 				switch(state.textureFormat)
 				{
-				case VK_FORMAT_R8_SINT:
-				case VK_FORMAT_R8_UINT:
-				case VK_FORMAT_S8_UINT:
+					case VK_FORMAT_R8_SINT:
+					case VK_FORMAT_R8_UINT:
+					case VK_FORMAT_S8_UINT:
 					{
 						Int zero(0);
 						c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
@@ -1558,58 +1637,58 @@
 						}
 					}
 					break;
-				case VK_FORMAT_R8_SNORM:
-				case VK_FORMAT_R8_UNORM:
-				case VK_FORMAT_R8_SRGB:
-					// TODO: avoid populating the low bits at all.
-					c.x = Unpack(As<Byte4>(c0));
-					c.x &= Short4(0xFF00u);
-					break;
-				default:
-					c.x = Unpack(As<Byte4>(c0));
-					break;
+					case VK_FORMAT_R8_SNORM:
+					case VK_FORMAT_R8_UNORM:
+					case VK_FORMAT_R8_SRGB:
+						// TODO: avoid populating the low bits at all.
+						c.x = Unpack(As<Byte4>(c0));
+						c.x &= Short4(0xFF00u);
+						break;
+					default:
+						c.x = Unpack(As<Byte4>(c0));
+						break;
 				}
 			}
 			break;
-		default:
-			ASSERT(false);
+			default:
+				ASSERT(false);
 		}
 	}
 	else if(has16bitTextureComponents())
 	{
 		switch(textureComponentCount())
 		{
-		case 4:
-			c.x = Pointer<Short4>(buffer)[index[0]];
-			c.y = Pointer<Short4>(buffer)[index[1]];
-			c.z = Pointer<Short4>(buffer)[index[2]];
-			c.w = Pointer<Short4>(buffer)[index[3]];
-			transpose4x4(c.x, c.y, c.z, c.w);
-			break;
-		case 3:
-			c.x = Pointer<Short4>(buffer)[index[0]];
-			c.y = Pointer<Short4>(buffer)[index[1]];
-			c.z = Pointer<Short4>(buffer)[index[2]];
-			c.w = Pointer<Short4>(buffer)[index[3]];
-			transpose4x3(c.x, c.y, c.z, c.w);
-			break;
-		case 2:
-			c.x = *Pointer<Short4>(buffer + 4 * index[0]);
-			c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1])));
-			c.z = *Pointer<Short4>(buffer + 4 * index[2]);
-			c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3])));
-			c.y = c.x;
-			c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
-			c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
-			break;
-		case 1:
-			c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
-			c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
-			c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
-			c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
-			break;
-		default:
-			ASSERT(false);
+			case 4:
+				c.x = Pointer<Short4>(buffer)[index[0]];
+				c.y = Pointer<Short4>(buffer)[index[1]];
+				c.z = Pointer<Short4>(buffer)[index[2]];
+				c.w = Pointer<Short4>(buffer)[index[3]];
+				transpose4x4(c.x, c.y, c.z, c.w);
+				break;
+			case 3:
+				c.x = Pointer<Short4>(buffer)[index[0]];
+				c.y = Pointer<Short4>(buffer)[index[1]];
+				c.z = Pointer<Short4>(buffer)[index[2]];
+				c.w = Pointer<Short4>(buffer)[index[3]];
+				transpose4x3(c.x, c.y, c.z, c.w);
+				break;
+			case 2:
+				c.x = *Pointer<Short4>(buffer + 4 * index[0]);
+				c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1])));
+				c.z = *Pointer<Short4>(buffer + 4 * index[2]);
+				c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3])));
+				c.y = c.x;
+				c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
+				c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
+				break;
+			case 1:
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
+				break;
+			default:
+				ASSERT(false);
 		}
 	}
 	else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UNORM_PACK32)
@@ -1639,12 +1718,13 @@
 		cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
 		cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
 
-		c.x = Short4(((cc) & Int4(0x3FF)));
+		c.x = Short4(((cc)&Int4(0x3FF)));
 		c.y = Short4(((cc >> 10) & Int4(0x3FF)));
 		c.z = Short4(((cc >> 20) & Int4(0x3FF)));
 		c.w = Short4(((cc >> 30) & Int4(0x3)));
 	}
-	else ASSERT(false);
+	else
+		ASSERT(false);
 
 	if(state.textureFormat.isSRGBformat())
 	{
@@ -1660,7 +1740,7 @@
 	return c;
 }
 
-Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function)
+Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function)
 {
 	Vector4s c;
 
@@ -1670,7 +1750,7 @@
 	if(isYcbcrFormat())
 	{
 		// Pointers to the planes of YCbCr images are stored in consecutive mipmap levels.
-		Pointer<Byte> bufferY = buffer;  // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
+		Pointer<Byte> bufferY = buffer;                                                                         // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
 		Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));  // U/V for 2-plane interleaved formats.
 		Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
 
@@ -1715,7 +1795,8 @@
 				U = (UV & Short4(0x00FFu)) | (UV << 8);
 				V = (UV & Short4(0xFF00u)) | As<Short4>(As<UShort4>(UV) >> 8);
 			}
-			else UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
+			else
+				UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
 
 			if(!state.swappedChroma)
 			{
@@ -1733,19 +1814,19 @@
 		{
 			// YCbCr formats are treated as signed 15-bit.
 			c.x = Cr >> 1;
-			c.y = Y  >> 1;
+			c.y = Y >> 1;
 			c.z = Cb >> 1;
 		}
 		else
 		{
 			// Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
 			// Scale down by 0x0101 to normalize the 8.8 samples, and up by 0x7FFF for signed 15-bit output.
-			float yOffset  = static_cast<float>(state.studioSwing ? 16 * 0x0101 : 0);
+			float yOffset = static_cast<float>(state.studioSwing ? 16 * 0x0101 : 0);
 			float uvOffset = static_cast<float>(128 * 0x0101);
-			float yFactor  = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 219 * 0x0101 : 255 * 0x0101);
+			float yFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 219 * 0x0101 : 255 * 0x0101);
 			float uvFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 224 * 0x0101 : 255 * 0x0101);
 
-			Float4 y = (Float4(Y)  - Float4(yOffset))  * Float4(yFactor);
+			Float4 y = (Float4(Y) - Float4(yOffset)) * Float4(yFactor);
 			Float4 u = (Float4(Cb) - Float4(uvOffset)) * Float4(uvFactor);
 			Float4 v = (Float4(Cr) - Float4(uvOffset)) * Float4(uvFactor);
 
@@ -1767,20 +1848,20 @@
 
 				switch(state.ycbcrModel)
 				{
-				case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
-					Kb = 0.0722f;
-					Kr = 0.2126f;
-					break;
-				case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
-					Kb = 0.114f;
-					Kr = 0.299f;
-					break;
-				case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
-					Kb = 0.0593f;
-					Kr = 0.2627f;
-					break;
-				default:
-					UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel));
+					case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
+						Kb = 0.0722f;
+						Kr = 0.2126f;
+						break;
+					case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
+						Kb = 0.114f;
+						Kr = 0.299f;
+						break;
+					case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
+						Kb = 0.0593f;
+						Kr = 0.2627f;
+						break;
+					default:
+						UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel));
 				}
 
 				const float Kg = 1.0f - Kr - Kb;
@@ -1790,9 +1871,9 @@
 				const float Gr = -2 * Kr * (1 - Kr) / Kg;
 				const float Bb = 2 * (1 - Kb);
 
-				Float4 r = y                  + Float4(Rr) * v;
+				Float4 r = y + Float4(Rr) * v;
 				Float4 g = y + Float4(Gb) * u + Float4(Gr) * v;
-				Float4 b = y + Float4(Bb) * u                 ;
+				Float4 b = y + Float4(Bb) * u;
 
 				c.x = Short4(r);
 				c.y = Short4(g);
@@ -1808,7 +1889,7 @@
 	return c;
 }
 
-Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function)
+Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function)
 {
 	Int4 valid;
 
@@ -1832,111 +1913,111 @@
 	{
 		switch(state.textureFormat)
 		{
-		case VK_FORMAT_R16_SFLOAT:
-			t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2));
-			t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2));
-			t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2));
-			t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2));
+			case VK_FORMAT_R16_SFLOAT:
+				t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2));
+				t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2));
+				t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2));
+				t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2));
 
-			c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
-			c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
-			c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
-			c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
-			break;
-		case VK_FORMAT_R16G16_SFLOAT:
-			t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4));
-			t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4));
-			t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4));
-			t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4));
+				c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
+				c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
+				c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
+				c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
+				break;
+			case VK_FORMAT_R16G16_SFLOAT:
+				t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4));
+				t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4));
+				t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4));
+				t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4));
 
-			// FIXME: shuffles
-			c.x = As<Float4>(halfToFloatBits(t0));
-			c.y = As<Float4>(halfToFloatBits(t1));
-			c.z = As<Float4>(halfToFloatBits(t2));
-			c.w = As<Float4>(halfToFloatBits(t3));
-			transpose4x4(c.x, c.y, c.z, c.w);
-			break;
-		case VK_FORMAT_R16G16B16A16_SFLOAT:
-			t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8));
-			t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8));
-			t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8));
-			t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8));
+				// FIXME: shuffles
+				c.x = As<Float4>(halfToFloatBits(t0));
+				c.y = As<Float4>(halfToFloatBits(t1));
+				c.z = As<Float4>(halfToFloatBits(t2));
+				c.w = As<Float4>(halfToFloatBits(t3));
+				transpose4x4(c.x, c.y, c.z, c.w);
+				break;
+			case VK_FORMAT_R16G16B16A16_SFLOAT:
+				t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8));
+				t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8));
+				t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8));
+				t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8));
 
-			c.x = As<Float4>(halfToFloatBits(t0));
-			c.y = As<Float4>(halfToFloatBits(t1));
-			c.z = As<Float4>(halfToFloatBits(t2));
-			c.w = As<Float4>(halfToFloatBits(t3));
-			transpose4x4(c.x, c.y, c.z, c.w);
-			break;
-		case VK_FORMAT_R32_SFLOAT:
-		case VK_FORMAT_R32_SINT:
-		case VK_FORMAT_R32_UINT:
-		case VK_FORMAT_D32_SFLOAT:
-			// FIXME: Optimal shuffling?
-			c.x.x = *Pointer<Float>(buffer + index[0] * 4);
-			c.x.y = *Pointer<Float>(buffer + index[1] * 4);
-			c.x.z = *Pointer<Float>(buffer + index[2] * 4);
-			c.x.w = *Pointer<Float>(buffer + index[3] * 4);
-			break;
-		case VK_FORMAT_R32G32_SFLOAT:
-		case VK_FORMAT_R32G32_SINT:
-		case VK_FORMAT_R32G32_UINT:
-			// FIXME: Optimal shuffling?
-			c.x.xy = *Pointer<Float4>(buffer + index[0] * 8);
-			c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8);
-			c.z.xy = *Pointer<Float4>(buffer + index[2] * 8);
-			c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8);
-			c.y = c.x;
-			c.x = Float4(c.x.xz, c.z.xz);
-			c.y = Float4(c.y.yw, c.z.yw);
-			break;
-		case VK_FORMAT_R32G32B32_SFLOAT:
-		case VK_FORMAT_R32G32B32_SINT:
-		case VK_FORMAT_R32G32B32_UINT:
-			c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
-			c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
-			c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
-			c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
-			transpose4x3(c.x, c.y, c.z, c.w);
-			break;
-		case VK_FORMAT_R32G32B32A32_SFLOAT:
-		case VK_FORMAT_R32G32B32A32_SINT:
-		case VK_FORMAT_R32G32B32A32_UINT:
-			c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
-			c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
-			c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
-			c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
-			transpose4x4(c.x, c.y, c.z, c.w);
-			break;
-		case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
-		{
-			Float4 t;		// TODO: add Insert(UInt4, RValue<UInt>)
-			t.x = *Pointer<Float>(buffer + index[0] * 4);
-			t.y = *Pointer<Float>(buffer + index[1] * 4);
-			t.z = *Pointer<Float>(buffer + index[2] * 4);
-			t.w = *Pointer<Float>(buffer + index[3] * 4);
-			t0 = As<UInt4>(t);
-			c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24));
-			c.x = Float4((t0) & UInt4(0x1FF)) * c.w;
-			c.y = Float4((t0 >> 9) & UInt4(0x1FF)) * c.w;
-			c.z = Float4((t0 >> 18) & UInt4(0x1FF)) * c.w;
-			break;
-		}
-		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
-		{
-			Float4 t;		// TODO: add Insert(UInt4, RValue<UInt>)
-			t.x = *Pointer<Float>(buffer + index[0] * 4);
-			t.y = *Pointer<Float>(buffer + index[1] * 4);
-			t.z = *Pointer<Float>(buffer + index[2] * 4);
-			t.w = *Pointer<Float>(buffer + index[3] * 4);
-			t0 = As<UInt4>(t);
-			c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0)));
-			c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0)));
-			c.z = As<Float4>(halfToFloatBits((t0 >> 17) & UInt4(0x7FE0)));
-			break;
-		}
-		default:
-			UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat));
+				c.x = As<Float4>(halfToFloatBits(t0));
+				c.y = As<Float4>(halfToFloatBits(t1));
+				c.z = As<Float4>(halfToFloatBits(t2));
+				c.w = As<Float4>(halfToFloatBits(t3));
+				transpose4x4(c.x, c.y, c.z, c.w);
+				break;
+			case VK_FORMAT_R32_SFLOAT:
+			case VK_FORMAT_R32_SINT:
+			case VK_FORMAT_R32_UINT:
+			case VK_FORMAT_D32_SFLOAT:
+				// FIXME: Optimal shuffling?
+				c.x.x = *Pointer<Float>(buffer + index[0] * 4);
+				c.x.y = *Pointer<Float>(buffer + index[1] * 4);
+				c.x.z = *Pointer<Float>(buffer + index[2] * 4);
+				c.x.w = *Pointer<Float>(buffer + index[3] * 4);
+				break;
+			case VK_FORMAT_R32G32_SFLOAT:
+			case VK_FORMAT_R32G32_SINT:
+			case VK_FORMAT_R32G32_UINT:
+				// FIXME: Optimal shuffling?
+				c.x.xy = *Pointer<Float4>(buffer + index[0] * 8);
+				c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8);
+				c.z.xy = *Pointer<Float4>(buffer + index[2] * 8);
+				c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8);
+				c.y = c.x;
+				c.x = Float4(c.x.xz, c.z.xz);
+				c.y = Float4(c.y.yw, c.z.yw);
+				break;
+			case VK_FORMAT_R32G32B32_SFLOAT:
+			case VK_FORMAT_R32G32B32_SINT:
+			case VK_FORMAT_R32G32B32_UINT:
+				c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
+				c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
+				c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
+				c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
+				transpose4x3(c.x, c.y, c.z, c.w);
+				break;
+			case VK_FORMAT_R32G32B32A32_SFLOAT:
+			case VK_FORMAT_R32G32B32A32_SINT:
+			case VK_FORMAT_R32G32B32A32_UINT:
+				c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
+				c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
+				c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
+				c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
+				transpose4x4(c.x, c.y, c.z, c.w);
+				break;
+			case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+			{
+				Float4 t;  // TODO: add Insert(UInt4, RValue<UInt>)
+				t.x = *Pointer<Float>(buffer + index[0] * 4);
+				t.y = *Pointer<Float>(buffer + index[1] * 4);
+				t.z = *Pointer<Float>(buffer + index[2] * 4);
+				t.w = *Pointer<Float>(buffer + index[3] * 4);
+				t0 = As<UInt4>(t);
+				c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24));
+				c.x = Float4((t0)&UInt4(0x1FF)) * c.w;
+				c.y = Float4((t0 >> 9) & UInt4(0x1FF)) * c.w;
+				c.z = Float4((t0 >> 18) & UInt4(0x1FF)) * c.w;
+				break;
+			}
+			case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+			{
+				Float4 t;  // TODO: add Insert(UInt4, RValue<UInt>)
+				t.x = *Pointer<Float>(buffer + index[0] * 4);
+				t.y = *Pointer<Float>(buffer + index[1] * 4);
+				t.z = *Pointer<Float>(buffer + index[2] * 4);
+				t.w = *Pointer<Float>(buffer + index[3] * 4);
+				t0 = As<UInt4>(t);
+				c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0)));
+				c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0)));
+				c.z = As<Float4>(halfToFloatBits((t0 >> 17) & UInt4(0x7FE0)));
+				break;
+			}
+			default:
+				UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat));
 		}
 	}
 	else
@@ -1989,15 +2070,15 @@
 
 		switch(state.compareOp)
 		{
-		case VK_COMPARE_OP_LESS_OR_EQUAL:    boolean = CmpLE(ref, c.x);  break;
-		case VK_COMPARE_OP_GREATER_OR_EQUAL: boolean = CmpNLT(ref, c.x); break;
-		case VK_COMPARE_OP_LESS:             boolean = CmpLT(ref, c.x);  break;
-		case VK_COMPARE_OP_GREATER:          boolean = CmpNLE(ref, c.x); break;
-		case VK_COMPARE_OP_EQUAL:            boolean = CmpEQ(ref, c.x);  break;
-		case VK_COMPARE_OP_NOT_EQUAL:        boolean = CmpNEQ(ref, c.x); break;
-		case VK_COMPARE_OP_ALWAYS:           boolean = Int4(-1);         break;
-		case VK_COMPARE_OP_NEVER:            boolean = Int4(0);          break;
-		default:                   ASSERT(false);
+			case VK_COMPARE_OP_LESS_OR_EQUAL: boolean = CmpLE(ref, c.x); break;
+			case VK_COMPARE_OP_GREATER_OR_EQUAL: boolean = CmpNLT(ref, c.x); break;
+			case VK_COMPARE_OP_LESS: boolean = CmpLT(ref, c.x); break;
+			case VK_COMPARE_OP_GREATER: boolean = CmpNLE(ref, c.x); break;
+			case VK_COMPARE_OP_EQUAL: boolean = CmpEQ(ref, c.x); break;
+			case VK_COMPARE_OP_NOT_EQUAL: boolean = CmpNEQ(ref, c.x); break;
+			case VK_COMPARE_OP_ALWAYS: boolean = Int4(-1); break;
+			case VK_COMPARE_OP_NEVER: boolean = Int4(0); break;
+			default: ASSERT(false);
 		}
 
 		c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f)));
@@ -2025,29 +2106,29 @@
 
 	switch(state.border)
 	{
-	case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
-	case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
-		borderRGB = Int4(0);
-		borderA = Int4(0);
-		break;
-	case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
-		borderRGB = Int4(0);
-		borderA = float_one;
-		break;
-	case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
-		borderRGB = Int4(0);
-		borderA = Int4(1);
-		break;
-	case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
-		borderRGB = float_one;
-		borderA = float_one;
-		break;
-	case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
-		borderRGB = Int4(1);
-		borderA = Int4(1);
-		break;
-	default:
-		UNIMPLEMENTED("sint/uint/sfloat border: %u", state.border);
+		case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
+		case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
+			borderRGB = Int4(0);
+			borderA = Int4(0);
+			break;
+		case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
+			borderRGB = Int4(0);
+			borderA = float_one;
+			break;
+		case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
+			borderRGB = Int4(0);
+			borderA = Int4(1);
+			break;
+		case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
+			borderRGB = float_one;
+			borderA = float_one;
+			break;
+		case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
+			borderRGB = Int4(1);
+			borderA = Int4(1);
+			break;
+		default:
+			UNIMPLEMENTED("sint/uint/sfloat border: %u", state.border);
 	}
 
 	Vector4f out;
@@ -2076,7 +2157,7 @@
 			// TODO: Preferred formula is ceil(lod + 0.5) - 1
 			ilod = RoundInt(lod);
 		}
-		else   // MIPMAP_LINEAR
+		else  // MIPMAP_LINEAR
 		{
 			ilod = Int(lod);
 		}
@@ -2148,7 +2229,7 @@
 
 		return As<Short4>(Int2(convert)) + Short4(0x8000u);
 	}
-	else   // Wrap
+	else  // Wrap
 	{
 		return Short4(Int4(uw * Float4(1 << 16)));
 	}
@@ -2182,7 +2263,7 @@
 	{
 		xyz0 = Min(Max(((function.offset != 0) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
 	}
-	else if(addressingMode == ADDRESSING_LAYER)   // Note: Offset does not apply to array layers
+	else if(addressingMode == ADDRESSING_LAYER)  // Note: Offset does not apply to array layers
 	{
 		// For cube maps, the layer argument is per cube, each of which has 6 layers
 		if(state.textureType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
@@ -2198,9 +2279,9 @@
 	}
 	else
 	{
-		const int halfBits = 0x3EFFFFFF;   // Value just under 0.5f
-		const int oneBits  = 0x3F7FFFFF;   // Value just under 1.0f
-		const int twoBits  = 0x3FFFFFFF;   // Value just under 2.0f
+		const int halfBits = 0x3EFFFFFF;  // Value just under 0.5f
+		const int oneBits = 0x3F7FFFFF;   // Value just under 1.0f
+		const int twoBits = 0x3FFFFFFF;   // Value just under 2.0f
 
 		bool pointFilter = state.textureFilter == FILTER_POINT ||
 		                   state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR ||
@@ -2212,17 +2293,17 @@
 		{
 			switch(addressingMode)
 			{
-			case ADDRESSING_CLAMP:
-				coord = Min(Max(coord, Float4(0.0f)), Float4(dim) * As<Float4>(Int4(oneBits)));
-				break;
-			case ADDRESSING_BORDER:
-				// Don't map to a valid range here.
-				break;
-			default:
-				// If unnormalizedCoordinates is VK_TRUE, addressModeU and addressModeV must each be
-				// either VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE or VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER
-				UNREACHABLE("addressingMode %d", int(addressingMode));
-				break;
+				case ADDRESSING_CLAMP:
+					coord = Min(Max(coord, Float4(0.0f)), Float4(dim) * As<Float4>(Int4(oneBits)));
+					break;
+				case ADDRESSING_BORDER:
+					// Don't map to a valid range here.
+					break;
+				default:
+					// If unnormalizedCoordinates is VK_TRUE, addressModeU and addressModeV must each be
+					// either VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE or VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER
+					UNREACHABLE("addressingMode %d", int(addressingMode));
+					break;
 			}
 		}
 		else if(state.textureFilter == FILTER_GATHER && addressingMode == ADDRESSING_MIRROR)
@@ -2244,8 +2325,8 @@
 
 			xyz1 = xyz0 + Int4(1);
 
-			xyz0 = (maxXYZ) - mirror(mod(xyz0, Int4(2) * dim) - dim);
-			xyz1 = (maxXYZ) - mirror(mod(xyz1, Int4(2) * dim) - dim);
+			xyz0 = (maxXYZ)-mirror(mod(xyz0, Int4(2) * dim) - dim);
+			xyz1 = (maxXYZ)-mirror(mod(xyz1, Int4(2) * dim) - dim);
 
 			return;
 		}
@@ -2255,17 +2336,17 @@
 			{
 				switch(addressingMode)
 				{
-				case ADDRESSING_CLAMP:
-				case ADDRESSING_SEAMLESS:
-					// Linear filtering of cube doesn't require clamping because the coordinates
-					// are already in [0, 1] range and numerical imprecision is tolerated.
-					if(addressingMode != ADDRESSING_SEAMLESS || pointFilter)
-					{
-						Float4 one = As<Float4>(Int4(oneBits));
-						coord = Min(Max(coord, Float4(0.0f)), one);
-					}
-					break;
-				case ADDRESSING_MIRROR:
+					case ADDRESSING_CLAMP:
+					case ADDRESSING_SEAMLESS:
+						// Linear filtering of cube doesn't require clamping because the coordinates
+						// are already in [0, 1] range and numerical imprecision is tolerated.
+						if(addressingMode != ADDRESSING_SEAMLESS || pointFilter)
+						{
+							Float4 one = As<Float4>(Int4(oneBits));
+							coord = Min(Max(coord, Float4(0.0f)), one);
+						}
+						break;
+					case ADDRESSING_MIRROR:
 					{
 						Float4 half = As<Float4>(Int4(halfBits));
 						Float4 one = As<Float4>(Int4(oneBits));
@@ -2273,7 +2354,7 @@
 						coord = one - Abs(two * Frac(coord * half) - one);
 					}
 					break;
-				case ADDRESSING_MIRRORONCE:
+					case ADDRESSING_MIRRORONCE:
 					{
 						Float4 half = As<Float4>(Int4(halfBits));
 						Float4 one = As<Float4>(Int4(oneBits));
@@ -2281,12 +2362,12 @@
 						coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one);
 					}
 					break;
-				case ADDRESSING_BORDER:
-					// Don't map to a valid range here.
-					break;
-				default:   // Wrap
-					coord = Frac(coord);
-					break;
+					case ADDRESSING_BORDER:
+						// Don't map to a valid range here.
+						break;
+					default:  // Wrap
+						coord = Frac(coord);
+						break;
 				}
 			}
 
@@ -2331,7 +2412,7 @@
 			xyz0 += Int4(1);
 		}
 
-		xyz1 = xyz0 - filter;   // Increment
+		xyz1 = xyz0 - filter;  // Increment
 
 		if(addressingMode == ADDRESSING_BORDER)
 		{
@@ -2345,41 +2426,41 @@
 		{
 			switch(addressingMode)
 			{
-			case ADDRESSING_SEAMLESS:
-				UNREACHABLE("addressingMode %d", int(addressingMode));  // Cube sampling doesn't support offset.
-			case ADDRESSING_MIRROR:
-			case ADDRESSING_MIRRORONCE:
-				// TODO: Implement ADDRESSING_MIRROR and ADDRESSING_MIRRORONCE.
-				// Fall through to Clamp.
-			case ADDRESSING_CLAMP:
-				xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
-				xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
-				break;
-			default:   // Wrap
-				xyz0 = mod(xyz0, dim);
-				xyz1 = mod(xyz1, dim);
-				break;
+				case ADDRESSING_SEAMLESS:
+					UNREACHABLE("addressingMode %d", int(addressingMode));  // Cube sampling doesn't support offset.
+				case ADDRESSING_MIRROR:
+				case ADDRESSING_MIRRORONCE:
+					// TODO: Implement ADDRESSING_MIRROR and ADDRESSING_MIRRORONCE.
+					// Fall through to Clamp.
+				case ADDRESSING_CLAMP:
+					xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
+					xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
+					break;
+				default:  // Wrap
+					xyz0 = mod(xyz0, dim);
+					xyz1 = mod(xyz1, dim);
+					break;
 			}
 		}
 		else if(state.textureFilter != FILTER_POINT)
 		{
 			switch(addressingMode)
 			{
-			case ADDRESSING_SEAMLESS:
-				break;
-			case ADDRESSING_MIRROR:
-			case ADDRESSING_MIRRORONCE:
-			case ADDRESSING_CLAMP:
-				xyz0 = Max(xyz0, Int4(0));
-				xyz1 = Min(xyz1, maxXYZ);
-				break;
-			default:   // Wrap
+				case ADDRESSING_SEAMLESS:
+					break;
+				case ADDRESSING_MIRROR:
+				case ADDRESSING_MIRRORONCE:
+				case ADDRESSING_CLAMP:
+					xyz0 = Max(xyz0, Int4(0));
+					xyz1 = Min(xyz1, maxXYZ);
+					break;
+				default:  // Wrap
 				{
 					Int4 under = CmpLT(xyz0, Int4(0));
-					xyz0 = (under & maxXYZ) | (~under & xyz0);   // xyz < 0 ? dim - 1 : xyz   // TODO: IfThenElse()
+					xyz0 = (under & maxXYZ) | (~under & xyz0);  // xyz < 0 ? dim - 1 : xyz   // TODO: IfThenElse()
 
 					Int4 nover = CmpLT(xyz1, dim);
-					xyz1 = nover & xyz1;   // xyz >= dim ? 0 : xyz
+					xyz1 = nover & xyz1;  // xyz >= dim ? 0 : xyz
 				}
 				break;
 			}
@@ -2401,7 +2482,7 @@
 {
 	c = As<UShort4>(c) >> 8;
 
-	Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16));
+	Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants, sRGBtoLinear8_16));
 
 	c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
 	c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
@@ -2483,13 +2564,13 @@
 {
 	switch(state.gatherComponent)
 	{
-	case 0: return state.swizzle.r;
-	case 1: return state.swizzle.g;
-	case 2: return state.swizzle.b;
-	case 3: return state.swizzle.a;
-	default:
-		UNREACHABLE("Invalid component");
-		return VK_COMPONENT_SWIZZLE_R;
+		case 0: return state.swizzle.r;
+		case 1: return state.swizzle.g;
+		case 2: return state.swizzle.b;
+		case 3: return state.swizzle.a;
+		default:
+			UNREACHABLE("Invalid component");
+			return VK_COMPONENT_SWIZZLE_R;
 	}
 }
 
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp
index 396a9f6..b1e925f 100644
--- a/src/Pipeline/SamplerCore.hpp
+++ b/src/Pipeline/SamplerCore.hpp
@@ -21,7 +21,7 @@
 #include "Reactor/Reactor.hpp"
 
 #ifdef None
-#undef None  // b/127920555
+#	undef None  // b/127920555
 #endif
 
 namespace sw {
@@ -45,7 +45,9 @@
 struct SamplerFunction
 {
 	SamplerFunction(SamplerMethod method, bool offset = false, bool sample = false)
-		: method(method), offset(offset), sample(sample)
+	    : method(method)
+	    , offset(offset)
+	    , sample(sample)
 	{}
 
 	operator SamplerMethod() { return method; }
@@ -60,20 +62,20 @@
 public:
 	SamplerCore(Pointer<Byte> &constants, const Sampler &state);
 
-	Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4& sampleId, SamplerFunction function);
+	Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4 &sampleId, SamplerFunction function);
 
 private:
 	Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod);
-	Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
-	Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
-	Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
-	Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
-	Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
-	Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
-	Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
-	Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
-	Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
-	Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+	Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
+	Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
+	Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+	Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+	Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+	Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
+	Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
+	Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+	Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+	Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
 	Float log2sqrt(Float lod);
 	Float log2(Float lod);
 	void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Float4 &dsx, Float4 &dsy, SamplerFunction function);
@@ -81,15 +83,15 @@
 	void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, SamplerFunction function);
 	Int4 cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M);
 	Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
-	void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, SamplerFunction function);
-	void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, SamplerFunction function);
-	Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function);
+	void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function);
+	void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function);
+	Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function);
 	Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer);
-	Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function);
+	Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function);
 	Vector4f replaceBorderTexel(const Vector4f &c, Int4 valid);
 	void selectMipmap(const Pointer<Byte> &texture, Pointer<Byte> &mipmap, Pointer<Byte> &buffer, const Float &lod, bool secondLOD);
-	Short4 address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap);
-	void address(const Float4 &uw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
+	Short4 address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap);
+	void address(const Float4 &uw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
 	Int4 computeFilterOffset(Float &lod);
 
 	void convertSigned15(Float4 &cf, Short4 &ci);
@@ -120,21 +122,22 @@
 #ifdef ENABLE_RR_PRINT
 namespace rr {
 
-template <> struct PrintValue::Ty<sw::SamplerFunction>
+template<>
+struct PrintValue::Ty<sw::SamplerFunction>
 {
-	static std::string fmt(const sw::SamplerFunction& v)
+	static std::string fmt(const sw::SamplerFunction &v)
 	{
 		return std::string("SamplerFunction[") +
-			"method: " + std::to_string(v.method) +
-			", offset: " + std::to_string(v.offset) +
-			", sample: " + std::to_string(v.sample) +
-			"]";
+		       "method: " + std::to_string(v.method) +
+		       ", offset: " + std::to_string(v.offset) +
+		       ", sample: " + std::to_string(v.sample) +
+		       "]";
 	}
 
-	static std::vector<rr::Value*> val(const sw::SamplerFunction& v) { return {}; }
+	static std::vector<rr::Value *> val(const sw::SamplerFunction &v) { return {}; }
 };
 
 }  // namespace rr
-#endif // ENABLE_RR_PRINT
+#endif  // ENABLE_RR_PRINT
 
-#endif   // sw_SamplerCore_hpp
+#endif  // sw_SamplerCore_hpp
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp
index d3307df..06dea29 100644
--- a/src/Pipeline/SetupRoutine.cpp
+++ b/src/Pipeline/SetupRoutine.cpp
@@ -12,18 +12,19 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <Device/Vertex.hpp>
 #include "SetupRoutine.hpp"
+#include <Device/Vertex.hpp>
 
 #include "Constants.hpp"
-#include "Device/Primitive.hpp"
 #include "Device/Polygon.hpp"
+#include "Device/Primitive.hpp"
 #include "Device/Renderer.hpp"
 #include "Reactor/Reactor.hpp"
 
 namespace sw {
 
-SetupRoutine::SetupRoutine(const SetupProcessor::State &state) : state(state)
+SetupRoutine::SetupRoutine(const SetupProcessor::State &state)
+    : state(state)
 {
 }
 
@@ -40,15 +41,15 @@
 		Pointer<Byte> polygon(function.Arg<2>());
 		Pointer<Byte> data(function.Arg<3>());
 
-		Pointer<Byte> constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
+		Pointer<Byte> constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData, constants));
 
 		const bool point = state.isDrawPoint;
 		const bool line = state.isDrawLine;
 		const bool triangle = state.isDrawTriangle;
 
-		const int V0 = OFFSET(Triangle,v0);
-		const int V1 = (triangle || line) ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0);
-		const int V2 = triangle ? OFFSET(Triangle,v2) : (line ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0));
+		const int V0 = OFFSET(Triangle, v0);
+		const int V1 = (triangle || line) ? OFFSET(Triangle, v1) : OFFSET(Triangle, v0);
+		const int V2 = triangle ? OFFSET(Triangle, v2) : (line ? OFFSET(Triangle, v1) : OFFSET(Triangle, v0));
 
 		Pointer<Byte> v0 = tri + V0;
 		Pointer<Byte> v1 = tri + V1;
@@ -57,15 +58,15 @@
 		Array<Int> X(16);
 		Array<Int> Y(16);
 
-		X[0] = *Pointer<Int>(v0 + OFFSET(Vertex,projected.x));
-		X[1] = *Pointer<Int>(v1 + OFFSET(Vertex,projected.x));
-		X[2] = *Pointer<Int>(v2 + OFFSET(Vertex,projected.x));
+		X[0] = *Pointer<Int>(v0 + OFFSET(Vertex, projected.x));
+		X[1] = *Pointer<Int>(v1 + OFFSET(Vertex, projected.x));
+		X[2] = *Pointer<Int>(v2 + OFFSET(Vertex, projected.x));
 
-		Y[0] = *Pointer<Int>(v0 + OFFSET(Vertex,projected.y));
-		Y[1] = *Pointer<Int>(v1 + OFFSET(Vertex,projected.y));
-		Y[2] = *Pointer<Int>(v2 + OFFSET(Vertex,projected.y));
+		Y[0] = *Pointer<Int>(v0 + OFFSET(Vertex, projected.y));
+		Y[1] = *Pointer<Int>(v1 + OFFSET(Vertex, projected.y));
+		Y[2] = *Pointer<Int>(v2 + OFFSET(Vertex, projected.y));
 
-		Int d = 1;     // Winding direction
+		Int d = 1;  // Winding direction
 
 		// Culling
 		if(triangle)
@@ -78,7 +79,7 @@
 			Float y1 = Float(Y[1]);
 			Float y2 = Float(Y[2]);
 
-			Float A = (y0 - y2) * x1 + (y2 - y1) * x0 + (y1 - y0) * x2;   // Area
+			Float A = (y0 - y2) * x1 + (y2 - y1) * x0 + (y1 - y0) * x2;  // Area
 
 			If(A == 0.0f)
 			{
@@ -106,40 +107,40 @@
 
 			If(frontFacing)
 			{
-				*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-				*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+				*Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+				*Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
 			}
 			Else
 			{
-				*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-				*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+				*Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+				*Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
 			}
 		}
 		else
 		{
-			*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-			*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+			*Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+			*Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
 		}
 
-		Int n = *Pointer<Int>(polygon + OFFSET(Polygon,n));
-		Int m = *Pointer<Int>(polygon + OFFSET(Polygon,i));
+		Int n = *Pointer<Int>(polygon + OFFSET(Polygon, n));
+		Int m = *Pointer<Int>(polygon + OFFSET(Polygon, i));
 
-		If(m != 0 || Bool(!triangle))   // Clipped triangle; reproject
+		If(m != 0 || Bool(!triangle))  // Clipped triangle; reproject
 		{
-			Pointer<Byte> V = polygon + OFFSET(Polygon,P) + m * sizeof(void*) * 16;
+			Pointer<Byte> V = polygon + OFFSET(Polygon, P) + m * sizeof(void *) * 16;
 
 			Int i = 0;
 
 			Do
 			{
-				Pointer<Float4> p = *Pointer<Pointer<Float4> >(V + i * sizeof(void*));
+				Pointer<Float4> p = *Pointer<Pointer<Float4> >(V + i * sizeof(void *));
 				Float4 v = *Pointer<Float4>(p, 16);
 
 				Float w = v.w;
 				Float rhw = IfThenElse(w != 0.0f, 1.0f / w, Float(1.0f));
 
-				X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,X0xF)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,WxF)));
-				Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,Y0xF)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,HxF)));
+				X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData, X0xF)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData, WxF)));
+				Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData, Y0xF)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData, HxF)));
 
 				i++;
 			}
@@ -176,8 +177,8 @@
 			yMax = (yMax + subPixM) >> subPixB;
 		}
 
-		yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
-		yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
+		yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData, scissorY0)));
+		yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData, scissorY1)));
 
 		// If yMin and yMax are initially negative, the scissor clamping above will typically result
 		// in yMin == 0 and yMax unchanged. We bail as we don't need to rasterize this primitive, and
@@ -201,16 +202,16 @@
 
 				if(state.multiSample > 1)
 				{
-					Xq[i] = Xq[i] + *Pointer<Int>(constants + OFFSET(Constants,Xf) + q * sizeof(int));
-					Yq[i] = Yq[i] + *Pointer<Int>(constants + OFFSET(Constants,Yf) + q * sizeof(int));
+					Xq[i] = Xq[i] + *Pointer<Int>(constants + OFFSET(Constants, Xf) + q * sizeof(int));
+					Yq[i] = Yq[i] + *Pointer<Int>(constants + OFFSET(Constants, Yf) + q * sizeof(int));
 				}
 
 				i++;
 			}
 			Until(i >= n);
 
-			Pointer<Byte> leftEdge = Pointer<Byte>(primitive + OFFSET(Primitive,outline->left)) + q * sizeof(Primitive);
-			Pointer<Byte> rightEdge = Pointer<Byte>(primitive + OFFSET(Primitive,outline->right)) + q * sizeof(Primitive);
+			Pointer<Byte> leftEdge = Pointer<Byte>(primitive + OFFSET(Primitive, outline->left)) + q * sizeof(Primitive);
+			Pointer<Byte> rightEdge = Pointer<Byte>(primitive + OFFSET(Primitive, outline->right)) + q * sizeof(Primitive);
 
 			if(state.multiSample > 1)
 			{
@@ -265,8 +266,8 @@
 			}
 		}
 
-		*Pointer<Int>(primitive + OFFSET(Primitive,yMin)) = yMin;
-		*Pointer<Int>(primitive + OFFSET(Primitive,yMax)) = yMax;
+		*Pointer<Int>(primitive + OFFSET(Primitive, yMin)) = yMin;
+		*Pointer<Int>(primitive + OFFSET(Primitive, yMax)) = yMax;
 
 		// Sort by minimum y
 		if(triangle)
@@ -305,15 +306,15 @@
 		w012.z = w2;
 		w012.w = 1;
 
-		Float rhw0 = *Pointer<Float>(v0 + OFFSET(Vertex,projected.w));
+		Float rhw0 = *Pointer<Float>(v0 + OFFSET(Vertex, projected.w));
 
-		Int X0 = *Pointer<Int>(v0 + OFFSET(Vertex,projected.x));
-		Int X1 = *Pointer<Int>(v1 + OFFSET(Vertex,projected.x));
-		Int X2 = *Pointer<Int>(v2 + OFFSET(Vertex,projected.x));
+		Int X0 = *Pointer<Int>(v0 + OFFSET(Vertex, projected.x));
+		Int X1 = *Pointer<Int>(v1 + OFFSET(Vertex, projected.x));
+		Int X2 = *Pointer<Int>(v2 + OFFSET(Vertex, projected.x));
 
-		Int Y0 = *Pointer<Int>(v0 + OFFSET(Vertex,projected.y));
-		Int Y1 = *Pointer<Int>(v1 + OFFSET(Vertex,projected.y));
-		Int Y2 = *Pointer<Int>(v2 + OFFSET(Vertex,projected.y));
+		Int Y0 = *Pointer<Int>(v0 + OFFSET(Vertex, projected.y));
+		Int Y1 = *Pointer<Int>(v1 + OFFSET(Vertex, projected.y));
+		Int Y2 = *Pointer<Int>(v2 + OFFSET(Vertex, projected.y));
 
 		if(point)
 		{
@@ -347,8 +348,8 @@
 		Float4 xQuad = Float4(0, 1, 0, 1) - Float4(dx);
 		Float4 yQuad = Float4(0, 0, 1, 1) - Float4(dy);
 
-		*Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16) = xQuad;
-		*Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16) = yQuad;
+		*Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16) = xQuad;
+		*Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16) = yQuad;
 
 		Float4 M[3];
 
@@ -365,18 +366,18 @@
 
 			M[0].x = (y1 * w2 - y2 * w1) * D;
 			M[0].y = (x2 * w1 - x1 * w2) * D;
-		//	M[0].z = rhw0;
-		//	M[0].w = 0;
+			//	M[0].z = rhw0;
+			//	M[0].w = 0;
 
 			M[1].x = y2 * A;
 			M[1].y = -x2 * A;
-		//	M[1].z = 0;
-		//	M[1].w = 0;
+			//	M[1].z = 0;
+			//	M[1].w = 0;
 
 			M[2].x = -y1 * A;
 			M[2].y = x1 * A;
-		//	M[2].z = 0;
-		//	M[2].w = 0;
+			//	M[2].z = 0;
+			//	M[2].w = 0;
 		}
 
 		if(state.interpolateW)
@@ -387,16 +388,16 @@
 			Float4 B = ABC.y;
 			Float4 C = ABC.z;
 
-			*Pointer<Float4>(primitive + OFFSET(Primitive,w.A), 16) = A;
-			*Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16) = B;
-			*Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) = C;
+			*Pointer<Float4>(primitive + OFFSET(Primitive, w.A), 16) = A;
+			*Pointer<Float4>(primitive + OFFSET(Primitive, w.B), 16) = B;
+			*Pointer<Float4>(primitive + OFFSET(Primitive, w.C), 16) = C;
 		}
 
 		if(state.interpolateZ)
 		{
-			Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex,projected.z));
-			Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex,projected.z));
-			Float z2 = *Pointer<Float>(v2 + OFFSET(Vertex,projected.z));
+			Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex, projected.z));
+			Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex, projected.z));
+			Float z2 = *Pointer<Float>(v2 + OFFSET(Vertex, projected.z));
 
 			z1 -= z0;
 			z2 -= z0;
@@ -412,7 +413,7 @@
 				Float x2 = Float(X2) * (1.0f / subPixF);
 				Float y2 = Float(Y2) * (1.0f / subPixF);
 
-				Float D = *Pointer<Float>(data + OFFSET(DrawData,depthRange)) / (x1 * y2 - x2 * y1);
+				Float D = *Pointer<Float>(data + OFFSET(DrawData, depthRange)) / (x1 * y2 - x2 * y1);
 
 				Float a = (y2 * z1 - y1 * z2) * D;
 				Float b = (x1 * z2 - x2 * z1) * D;
@@ -426,22 +427,22 @@
 				B = Float4(0, 0, 0, 0);
 			}
 
-			*Pointer<Float4>(primitive + OFFSET(Primitive,z.A), 16) = A;
-			*Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16) = B;
+			*Pointer<Float4>(primitive + OFFSET(Primitive, z.A), 16) = A;
+			*Pointer<Float4>(primitive + OFFSET(Primitive, z.B), 16) = B;
 
 			Float c = z0;
 
 			if(state.applySlopeDepthBias)
 			{
 				Float bias = Max(Abs(Float(A.x)), Abs(Float(B.x)));
-				bias *= *Pointer<Float>(data + OFFSET(DrawData,slopeDepthBias));
+				bias *= *Pointer<Float>(data + OFFSET(DrawData, slopeDepthBias));
 
 				c += bias;
 			}
 
-			C = Float4(c * *Pointer<Float>(data + OFFSET(DrawData,depthRange)) + *Pointer<Float>(data + OFFSET(DrawData,depthNear)));
+			C = Float4(c * *Pointer<Float>(data + OFFSET(DrawData, depthRange)) + *Pointer<Float>(data + OFFSET(DrawData, depthNear)));
 
-			*Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) = C;
+			*Pointer<Float4>(primitive + OFFSET(Primitive, z.C), 16) = C;
 		}
 
 		for(int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
@@ -449,27 +450,27 @@
 			if(state.gradient[interpolant].Type != SpirvShader::ATTRIBTYPE_UNUSED)
 			{
 				setupGradient(primitive, tri, w012, M, v0, v1, v2,
-						OFFSET(Vertex, v[interpolant]),
-						OFFSET(Primitive, V[interpolant]),
-						state.gradient[interpolant].Flat,
-						!state.gradient[interpolant].NoPerspective);
+				              OFFSET(Vertex, v[interpolant]),
+				              OFFSET(Primitive, V[interpolant]),
+				              state.gradient[interpolant].Flat,
+				              !state.gradient[interpolant].NoPerspective);
 			}
 		}
 
 		for(unsigned int i = 0; i < state.numClipDistances; i++)
 		{
 			setupGradient(primitive, tri, w012, M, v0, v1, v2,
-					OFFSET(Vertex, clipDistance[i]),
-					OFFSET(Primitive, clipDistance[i]),
-					false, true);
+			              OFFSET(Vertex, clipDistance[i]),
+			              OFFSET(Primitive, clipDistance[i]),
+			              false, true);
 		}
 
 		for(unsigned int i = 0; i < state.numCullDistances; i++)
 		{
 			setupGradient(primitive, tri, w012, M, v0, v1, v2,
-					OFFSET(Vertex, cullDistance[i]),
-					OFFSET(Primitive, cullDistance[i]),
-					false, true);
+			              OFFSET(Vertex, cullDistance[i]),
+			              OFFSET(Primitive, cullDistance[i]),
+			              false, true);
 		}
 
 		Return(1);
@@ -509,7 +510,7 @@
 	}
 	else
 	{
-		int leadingVertex = OFFSET(Triangle,v0);
+		int leadingVertex = OFFSET(Triangle, v0);
 		Float C = *Pointer<Float>(triangle + leadingVertex + attribute);
 
 		*Pointer<Float4>(primitive + planeEquation + 0, 16) = Float4(0, 0, 0, 0);
@@ -532,16 +533,16 @@
 		constexpr int subPixB = vk::SUBPIXEL_PRECISION_BITS;
 		constexpr int subPixM = vk::SUBPIXEL_PRECISION_MASK;
 
-		Int y1 = Max((Y1 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
-		Int y2 = Min((Y2 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
+		Int y1 = Max((Y1 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData, scissorY0)));
+		Int y2 = Min((Y2 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData, scissorY1)));
 
 		If(y1 < y2)
 		{
-			Int xMin = *Pointer<Int>(data + OFFSET(DrawData,scissorX0));
-			Int xMax = *Pointer<Int>(data + OFFSET(DrawData,scissorX1));
+			Int xMin = *Pointer<Int>(data + OFFSET(DrawData, scissorX0));
+			Int xMax = *Pointer<Int>(data + OFFSET(DrawData, scissorX1));
 
-			Pointer<Byte> leftEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left);
-			Pointer<Byte> rightEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right);
+			Pointer<Byte> leftEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->left);
+			Pointer<Byte> rightEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->right);
 			Pointer<Byte> edge = IfThenElse(swap, rightEdge, leftEdge);
 
 			// Deltas
@@ -552,19 +553,19 @@
 			Int FDY12 = DY12 << subPixB;
 
 			Int X = DX12 * ((y1 << subPixB) - Y1) + (X1 & subPixM) * DY12;
-			Int x = (X1 >> subPixB) + X / FDY12;   // Edge
-			Int d = X % FDY12;               // Error-term
-			Int ceil = -d >> 31;             // Ceiling division: remainder <= 0
+			Int x = (X1 >> subPixB) + X / FDY12;  // Edge
+			Int d = X % FDY12;                    // Error-term
+			Int ceil = -d >> 31;                  // Ceiling division: remainder <= 0
 			x -= ceil;
 			d -= ceil & FDY12;
 
-			Int Q = FDX12 / FDY12;   // Edge-step
-			Int R = FDX12 % FDY12;   // Error-step
-			Int floor = R >> 31;     // Flooring division: remainder >= 0
+			Int Q = FDX12 / FDY12;  // Edge-step
+			Int R = FDX12 % FDY12;  // Error-step
+			Int floor = R >> 31;    // Flooring division: remainder >= 0
 			Q += floor;
 			R += floor & FDY12;
 
-			Int D = FDY12;   // Error-overflow
+			Int D = FDY12;  // Error-overflow
 			Int y = y1;
 
 			Do
@@ -588,7 +589,7 @@
 
 void SetupRoutine::conditionalRotate1(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2)
 {
-	#if 0   // Rely on LLVM optimization
+#if 0  // Rely on LLVM optimization
 		If(condition)
 		{
 			Pointer<Byte> vX;
@@ -598,17 +599,17 @@
 			v1 = v2;
 			v2 = vX;
 		}
-	#else
-		Pointer<Byte> vX = v0;
-		v0 = IfThenElse(condition, v1, v0);
-		v1 = IfThenElse(condition, v2, v1);
-		v2 = IfThenElse(condition, vX, v2);
-	#endif
+#else
+	Pointer<Byte> vX = v0;
+	v0 = IfThenElse(condition, v1, v0);
+	v1 = IfThenElse(condition, v2, v1);
+	v2 = IfThenElse(condition, vX, v2);
+#endif
 }
 
 void SetupRoutine::conditionalRotate2(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2)
 {
-	#if 0   // Rely on LLVM optimization
+#if 0  // Rely on LLVM optimization
 		If(condition)
 		{
 			Pointer<Byte> vX;
@@ -618,12 +619,12 @@
 			v1 = v0;
 			v0 = vX;
 		}
-	#else
-		Pointer<Byte> vX = v2;
-		v2 = IfThenElse(condition, v1, v2);
-		v1 = IfThenElse(condition, v0, v1);
-		v0 = IfThenElse(condition, vX, v0);
-	#endif
+#else
+	Pointer<Byte> vX = v2;
+	v2 = IfThenElse(condition, v1, v2);
+	v1 = IfThenElse(condition, v0, v1);
+	v0 = IfThenElse(condition, vX, v0);
+#endif
 }
 
 SetupFunction::RoutineType SetupRoutine::getRoutine()
diff --git a/src/Pipeline/SetupRoutine.hpp b/src/Pipeline/SetupRoutine.hpp
index 59fe55a..b8b399b 100644
--- a/src/Pipeline/SetupRoutine.hpp
+++ b/src/Pipeline/SetupRoutine.hpp
@@ -45,4 +45,4 @@
 
 }  // namespace sw
 
-#endif   // sw_SetupRoutine_hpp
+#endif  // sw_SetupRoutine_hpp
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index 65b2084..534ed8d 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -55,10 +55,10 @@
 {
 	switch(i)
 	{
-	case 0: return x;
-	case 1: return y;
-	case 2: return z;
-	case 3: return w;
+		case 0: return x;
+		case 1: return y;
+		case 2: return z;
+		case 3: return w;
 	}
 
 	return x;
@@ -98,10 +98,10 @@
 {
 	switch(i)
 	{
-	case 0: return x;
-	case 1: return y;
-	case 2: return z;
-	case 3: return w;
+		case 0: return x;
+		case 1: return y;
+		case 2: return z;
+		case 3: return w;
 	}
 
 	return x;
@@ -116,20 +116,20 @@
 	// the IEEE-754 floating-point number. Clamp to prevent overflow
 	// past the representation of infinity.
 	Float4 x0 = x;
-	x0 = Min(x0, As<Float4>(Int4(0x43010000)));   // 129.00000e+0f
-	x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF)));   // -126.99999e+0f
+	x0 = Min(x0, As<Float4>(Int4(0x43010000)));  // 129.00000e+0f
+	x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF)));  // -126.99999e+0f
 
 	Int4 i = RoundInt(x0 - Float4(0.5f));
-	Float4 ii = As<Float4>((i + Int4(127)) << 23);   // Add single-precision bias, and shift into exponent.
+	Float4 ii = As<Float4>((i + Int4(127)) << 23);  // Add single-precision bias, and shift into exponent.
 
 	// For the fractional part use a polynomial
 	// which approximates 2^f in the 0 to 1 range.
 	Float4 f = x0 - Float4(i);
-	Float4 ff = As<Float4>(Int4(0x3AF61905));     // 1.8775767e-3f
-	ff = ff * f + As<Float4>(Int4(0x3C134806));   // 8.9893397e-3f
-	ff = ff * f + As<Float4>(Int4(0x3D64AA23));   // 5.5826318e-2f
-	ff = ff * f + As<Float4>(Int4(0x3E75EAD4));   // 2.4015361e-1f
-	ff = ff * f + As<Float4>(Int4(0x3F31727B));   // 6.9315308e-1f
+	Float4 ff = As<Float4>(Int4(0x3AF61905));    // 1.8775767e-3f
+	ff = ff * f + As<Float4>(Int4(0x3C134806));  // 8.9893397e-3f
+	ff = ff * f + As<Float4>(Int4(0x3D64AA23));  // 5.5826318e-2f
+	ff = ff * f + As<Float4>(Int4(0x3E75EAD4));  // 2.4015361e-1f
+	ff = ff * f + As<Float4>(Int4(0x3F31727B));  // 6.9315308e-1f
 	ff = ff * f + Float4(1.0f);
 
 	return ii * ff;
@@ -147,7 +147,7 @@
 	x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000));
 	x1 = As<Float4>(As<UInt4>(x1) >> 8);
 	x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f)));
-	x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f);   // FIXME: (x1 - 1.4960938f) * 256.0f;
+	x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f);  // FIXME: (x1 - 1.4960938f) * 256.0f;
 	x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f)));
 
 	x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f);
@@ -163,13 +163,13 @@
 Float4 exponential(RValue<Float4> x, bool pp)
 {
 	// FIXME: Propagate the constant
-	return exponential2(Float4(1.44269504f) * x, pp);   // 1/ln(2)
+	return exponential2(Float4(1.44269504f) * x, pp);  // 1/ln(2)
 }
 
 Float4 logarithm(RValue<Float4> x, bool pp)
 {
 	// FIXME: Propagate the constant
-	return Float4(6.93147181e-1f) * logarithm2(x, pp);   // ln(2)
+	return Float4(6.93147181e-1f) * logarithm2(x, pp);  // ln(2)
 }
 
 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp)
@@ -191,7 +191,7 @@
 	if(finite)
 	{
 		int big = 0x7F7FFFFF;
-		rcp = Min(rcp, Float4((float&)big));
+		rcp = Min(rcp, Float4((float &)big));
 	}
 
 	return rcp;
@@ -234,8 +234,8 @@
 
 Float4 sine_pi(RValue<Float4> x, bool pp)
 {
-	const Float4 A = Float4(-4.05284734e-1f);   // -4/pi^2
-	const Float4 B = Float4(1.27323954e+0f);    // 4/pi
+	const Float4 A = Float4(-4.05284734e-1f);  // -4/pi^2
+	const Float4 B = Float4(1.27323954e+0f);   // 4/pi
 	const Float4 C = Float4(7.75160950e-1f);
 	const Float4 D = Float4(2.24839049e-1f);
 
@@ -265,7 +265,7 @@
 Float4 sine(RValue<Float4> x, bool pp)
 {
 	// Reduce to [-0.5, 0.5] range
-	Float4 y = x * Float4(1.59154943e-1f);   // 1/2pi
+	Float4 y = x * Float4(1.59154943e-1f);  // 1/2pi
 	y = y - Round(y);
 
 	if(!pp)
@@ -320,10 +320,10 @@
 
 Float4 arcsin(RValue<Float4> x, bool pp)
 {
-	if(false) // Simpler implementation fails even lowp precision tests
+	if(false)  // Simpler implementation fails even lowp precision tests
 	{
 		// x*(pi/2-sqrt(1-x*x)*pi/5)
-		return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x*x) * Float4(6.28318531e-1f));
+		return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x * x) * Float4(6.28318531e-1f));
 	}
 	else
 	{
@@ -335,7 +335,7 @@
 		const Float4 a3(-0.0187293f);
 		Float4 absx = Abs(x);
 		return As<Float4>(As<Int4>(half_pi - Sqrt(Float4(1.0f) - absx) * (a0 + absx * (a1 + absx * (a2 + absx * a3)))) ^
-		       (As<Int4>(x) & Int4(0x80000000)));
+		                  (As<Int4>(x) & Int4(0x80000000)));
 	}
 }
 
@@ -366,20 +366,20 @@
 {
 	Float4 absx = Abs(x);
 	Int4 O = CmpNLT(absx, Float4(1.0f));
-	Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) | (~O & As<Int4>(absx))); // FIXME: Vector select
+	Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) | (~O & As<Int4>(absx)));  // FIXME: Vector select
 
 	const Float4 half_pi(1.57079632f);
 	Float4 theta = arctan_01(y, pp);
-	return As<Float4>(((O & As<Int4>(half_pi - theta)) | (~O & As<Int4>(theta))) ^ // FIXME: Vector select
-	       (As<Int4>(x) & Int4(0x80000000)));
+	return As<Float4>(((O & As<Int4>(half_pi - theta)) | (~O & As<Int4>(theta))) ^  // FIXME: Vector select
+	                  (As<Int4>(x) & Int4(0x80000000)));
 }
 
 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp)
 {
-	const Float4 pi(3.14159265f);            // pi
-	const Float4 minus_pi(-3.14159265f);     // -pi
-	const Float4 half_pi(1.57079632f);       // pi/2
-	const Float4 quarter_pi(7.85398163e-1f); // pi/4
+	const Float4 pi(3.14159265f);             // pi
+	const Float4 minus_pi(-3.14159265f);      // -pi
+	const Float4 half_pi(1.57079632f);        // pi/2
+	const Float4 quarter_pi(7.85398163e-1f);  // pi/4
 
 	// Rotate to upper semicircle when in lower semicircle
 	Int4 S = CmpLT(y, Float4(0.0f));
@@ -390,24 +390,24 @@
 	// Rotate to right quadrant when in left quadrant
 	Int4 Q = CmpLT(x0, Float4(0.0f));
 	theta += As<Float4>(Q & As<Int4>(half_pi));
-	Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0)));  // FIXME: Vector select
-	Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0))); // FIXME: Vector select
+	Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0)));   // FIXME: Vector select
+	Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0)));  // FIXME: Vector select
 
 	// Mirror to first octant when in second octant
 	Int4 O = CmpNLT(y1, x1);
-	Float4 x2 = As<Float4>((O & As<Int4>(y1)) | (~O & As<Int4>(x1))); // FIXME: Vector select
-	Float4 y2 = As<Float4>((O & As<Int4>(x1)) | (~O & As<Int4>(y1))); // FIXME: Vector select
+	Float4 x2 = As<Float4>((O & As<Int4>(y1)) | (~O & As<Int4>(x1)));  // FIXME: Vector select
+	Float4 y2 = As<Float4>((O & As<Int4>(x1)) | (~O & As<Int4>(y1)));  // FIXME: Vector select
 
 	// Approximation of atan in [0..1]
 	Int4 zero_x = CmpEQ(x2, Float4(0.0f));
-	Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4
+	Int4 inf_y = IsInf(y2);  // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4
 	Float4 atan2_theta = arctan_01(y2 / x2, pp);
-	theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) | (~O & (As<Int4>(atan2_theta))))) | // FIXME: Vector select
+	theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) | (~O & (As<Int4>(atan2_theta))))) |  // FIXME: Vector select
 	                    (inf_y & As<Int4>(quarter_pi)));
 
 	// Recover loss of precision for tiny theta angles
-	Int4 precision_loss = S & Q & O & ~inf_y; // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta
-	return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) | (~precision_loss & As<Int4>(theta))); // FIXME: Vector select
+	Int4 precision_loss = S & Q & O & ~inf_y;                                                            // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta
+	return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) | (~precision_loss & As<Int4>(theta)));  // FIXME: Vector select
 }
 
 Float4 sineh(RValue<Float4> x, bool pp)
@@ -539,10 +539,10 @@
 {
 	switch(N)
 	{
-	case 1: transpose4x1(row0, row1, row2, row3); break;
-	case 2: transpose4x2(row0, row1, row2, row3); break;
-	case 3: transpose4x3(row0, row1, row2, row3); break;
-	case 4: transpose4x4(row0, row1, row2, row3); break;
+		case 1: transpose4x1(row0, row1, row2, row3); break;
+		case 2: transpose4x2(row0, row1, row2, row3); break;
+		case 3: transpose4x3(row0, row1, row2, row3); break;
+		case 4: transpose4x4(row0, row1, row2, row3); break;
 	}
 }
 
@@ -551,15 +551,15 @@
 	auto magic = SIMD::UInt(126 << 23);
 
 	auto sign16 = halfBits & SIMD::UInt(0x8000);
-	auto man16  = halfBits & SIMD::UInt(0x03FF);
-	auto exp16  = halfBits & SIMD::UInt(0x7C00);
+	auto man16 = halfBits & SIMD::UInt(0x03FF);
+	auto exp16 = halfBits & SIMD::UInt(0x7C00);
 
 	auto isDnormOrZero = CmpEQ(exp16, SIMD::UInt(0));
 	auto isInfOrNaN = CmpEQ(exp16, SIMD::UInt(0x7C00));
 
 	auto sign32 = sign16 << 16;
-	auto man32  = man16 << 13;
-	auto exp32  = (exp16 + SIMD::UInt(0x1C000)) << 13;
+	auto man32 = man16 << 13;
+	auto exp32 = (exp16 + SIMD::UInt(0x1C000)) << 13;
 	auto norm32 = (man32 | exp32) | (isInfOrNaN & SIMD::UInt(0x7F800000));
 
 	auto denorm32 = As<SIMD::UInt>(As<SIMD::Float>(magic + man16) - As<SIMD::Float>(magic));
@@ -584,7 +584,10 @@
 	// Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
 	//       instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
 	SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)),
-	                                           As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) |
+	                                           As<SIMD::Float>(SIMD::UInt(c_clamp))))) -
+	                       SIMD::UInt(mask_round)) >>
+	                      13) &
+	                     b_isnormal) |
 	                    ((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) &
 	                     ((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) | SIMD::UInt(c_infty_as_fp16)));
 
@@ -610,7 +613,7 @@
 {
 	SIMD::UInt halfBits = floatToHalfBits(As<SIMD::UInt>(value), true) &
 	                      SIMD::UInt(0x7FF00000, 0x7FF00000, 0x7FE00000, 0);
-	return (UInt(halfBits.x) >> 20)  | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1);
+	return (UInt(halfBits.x) >> 20) | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1);
 }
 
 rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
@@ -670,9 +673,9 @@
 
 // Performs a fused-multiply add, returning a * b + c.
 rr::RValue<sw::SIMD::Float> FMA(
-		rr::RValue<sw::SIMD::Float> const &a,
-		rr::RValue<sw::SIMD::Float> const &b,
-		rr::RValue<sw::SIMD::Float> const &c)
+    rr::RValue<sw::SIMD::Float> const &a,
+    rr::RValue<sw::SIMD::Float> const &b,
+    rr::RValue<sw::SIMD::Float> const &c)
 {
 	return a * b + c;
 }
@@ -694,12 +697,12 @@
 	auto xIsNan = IsNan(x);
 	auto yIsNan = IsNan(y);
 	return As<sw::SIMD::Float>(
-		// If neither are NaN, return min
-		((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) |
-		// If one operand is a NaN, the other operand is the result
-		// If both operands are NaN, the result is a NaN.
-		((~xIsNan &  yIsNan) & As<sw::SIMD::Int>(x)) |
-		(( xIsNan          ) & As<sw::SIMD::Int>(y)));
+	    // If neither are NaN, return min
+	    ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) |
+	    // If one operand is a NaN, the other operand is the result
+	    // If both operands are NaN, the result is a NaN.
+	    ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
+	    ((xIsNan)&As<sw::SIMD::Int>(y)));
 }
 
 // Returns y if y > x; otherwise result is x.
@@ -711,37 +714,37 @@
 	auto xIsNan = IsNan(x);
 	auto yIsNan = IsNan(y);
 	return As<sw::SIMD::Float>(
-		// If neither are NaN, return max
-		((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) |
-		// If one operand is a NaN, the other operand is the result
-		// If both operands are NaN, the result is a NaN.
-		((~xIsNan &  yIsNan) & As<sw::SIMD::Int>(x)) |
-		(( xIsNan          ) & As<sw::SIMD::Int>(y)));
+	    // If neither are NaN, return max
+	    ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) |
+	    // If one operand is a NaN, the other operand is the result
+	    // If both operands are NaN, the result is a NaN.
+	    ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
+	    ((xIsNan)&As<sw::SIMD::Int>(y)));
 }
 
 // Returns the determinant of a 2x2 matrix.
 rr::RValue<sw::SIMD::Float> Determinant(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
-	rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
+    rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
 {
-	return a*d - b*c;
+	return a * d - b * c;
 }
 
 // Returns the determinant of a 3x3 matrix.
 rr::RValue<sw::SIMD::Float> Determinant(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
-	rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
-	rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
+    rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
+    rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
 {
-	return a*e*i + b*f*g + c*d*h - c*e*g - b*d*i - a*f*h;
+	return a * e * i + b * f * g + c * d * h - c * e * g - b * d * i - a * f * h;
 }
 
 // Returns the determinant of a 4x4 matrix.
 rr::RValue<sw::SIMD::Float> Determinant(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
-	rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
-	rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
-	rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
+    rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
+    rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
+    rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
 {
 	return a * Determinant(f, g, h,
 	                       j, k, l,
@@ -759,108 +762,130 @@
 
 // Returns the inverse of a 2x2 matrix.
 std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
-	rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
+    rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
 {
 	auto s = sw::SIMD::Float(1.0f) / Determinant(a, b, c, d);
-	return {{s*d, -s*b, -s*c, s*a}};
+	return { { s * d, -s * b, -s * c, s * a } };
 }
 
 // Returns the inverse of a 3x3 matrix.
 std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
-	rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
-	rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
+    rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
+    rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
 {
 	auto s = sw::SIMD::Float(1.0f) / Determinant(
-			a, b, c,
-			d, e, f,
-			g, h, i); // TODO: duplicate arithmetic calculating the det and below.
+	                                     a, b, c,
+	                                     d, e, f,
+	                                     g, h, i);  // TODO: duplicate arithmetic calculating the det and below.
 
-	return {{
-		s * (e*i - f*h), s * (c*h - b*i), s * (b*f - c*e),
-		s * (f*g - d*i), s * (a*i - c*g), s * (c*d - a*f),
-		s * (d*h - e*g), s * (b*g - a*h), s * (a*e - b*d),
-	}};
+	return { {
+		s * (e * i - f * h),
+		s * (c * h - b * i),
+		s * (b * f - c * e),
+		s * (f * g - d * i),
+		s * (a * i - c * g),
+		s * (c * d - a * f),
+		s * (d * h - e * g),
+		s * (b * g - a * h),
+		s * (a * e - b * d),
+	} };
 }
 
 // Returns the inverse of a 4x4 matrix.
 std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
-	rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
-	rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
-	rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
+    rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
+    rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
+    rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
 {
 	auto s = sw::SIMD::Float(1.0f) / Determinant(
-			a, b, c, d,
-			e, f, g, h,
-			i, j, k, l,
-			m, n, o, p); // TODO: duplicate arithmetic calculating the det and below.
+	                                     a, b, c, d,
+	                                     e, f, g, h,
+	                                     i, j, k, l,
+	                                     m, n, o, p);  // TODO: duplicate arithmetic calculating the det and below.
 
-	auto kplo = k*p - l*o, jpln = j*p - l*n, jokn = j*o - k*n;
-	auto gpho = g*p - h*o, fphn = f*p - h*n, fogn = f*o - g*n;
-	auto glhk = g*l - h*k, flhj = f*l - h*j, fkgj = f*k - g*j;
-	auto iplm = i*p - l*m, iokm = i*o - k*m, ephm = e*p - h*m;
-	auto eogm = e*o - g*m, elhi = e*l - h*i, ekgi = e*k - g*i;
-	auto injm = i*n - j*m, enfm = e*n - f*m, ejfi = e*j - f*i;
+	auto kplo = k * p - l * o, jpln = j * p - l * n, jokn = j * o - k * n;
+	auto gpho = g * p - h * o, fphn = f * p - h * n, fogn = f * o - g * n;
+	auto glhk = g * l - h * k, flhj = f * l - h * j, fkgj = f * k - g * j;
+	auto iplm = i * p - l * m, iokm = i * o - k * m, ephm = e * p - h * m;
+	auto eogm = e * o - g * m, elhi = e * l - h * i, ekgi = e * k - g * i;
+	auto injm = i * n - j * m, enfm = e * n - f * m, ejfi = e * j - f * i;
 
-	return {{
-		s * ( f * kplo - g * jpln + h * jokn),
+	return { {
+		s * (f * kplo - g * jpln + h * jokn),
 		s * (-b * kplo + c * jpln - d * jokn),
-		s * ( b * gpho - c * fphn + d * fogn),
+		s * (b * gpho - c * fphn + d * fogn),
 		s * (-b * glhk + c * flhj - d * fkgj),
 
 		s * (-e * kplo + g * iplm - h * iokm),
-		s * ( a * kplo - c * iplm + d * iokm),
+		s * (a * kplo - c * iplm + d * iokm),
 		s * (-a * gpho + c * ephm - d * eogm),
-		s * ( a * glhk - c * elhi + d * ekgi),
+		s * (a * glhk - c * elhi + d * ekgi),
 
-		s * ( e * jpln - f * iplm + h * injm),
+		s * (e * jpln - f * iplm + h * injm),
 		s * (-a * jpln + b * iplm - d * injm),
-		s * ( a * fphn - b * ephm + d * enfm),
+		s * (a * fphn - b * ephm + d * enfm),
 		s * (-a * flhj + b * elhi - d * ejfi),
 
 		s * (-e * jokn + f * iokm - g * injm),
-		s * ( a * jokn - b * iokm + c * injm),
+		s * (a * jokn - b * iokm + c * injm),
 		s * (-a * fogn + b * eogm - c * enfm),
-		s * ( a * fkgj - b * ekgi + c * ejfi),
-	}};
+		s * (a * fkgj - b * ekgi + c * ejfi),
+	} };
 }
 
 namespace SIMD {
 
 Pointer::Pointer(rr::Pointer<Byte> base, rr::Int limit)
-	: base(base),
-		dynamicLimit(limit), staticLimit(0),
-		dynamicOffsets(0), staticOffsets{},
-		hasDynamicLimit(true), hasDynamicOffsets(false) {}
+    : base(base)
+    , dynamicLimit(limit)
+    , staticLimit(0)
+    , dynamicOffsets(0)
+    , staticOffsets{}
+    , hasDynamicLimit(true)
+    , hasDynamicOffsets(false)
+{}
 
 Pointer::Pointer(rr::Pointer<Byte> base, unsigned int limit)
-	: base(base),
-		dynamicLimit(0), staticLimit(limit),
-		dynamicOffsets(0), staticOffsets{},
-		hasDynamicLimit(false), hasDynamicOffsets(false) {}
+    : base(base)
+    , dynamicLimit(0)
+    , staticLimit(limit)
+    , dynamicOffsets(0)
+    , staticOffsets{}
+    , hasDynamicLimit(false)
+    , hasDynamicOffsets(false)
+{}
 
 Pointer::Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
-	: base(base),
-		dynamicLimit(limit), staticLimit(0),
-		dynamicOffsets(offset), staticOffsets{},
-		hasDynamicLimit(true), hasDynamicOffsets(true) {}
+    : base(base)
+    , dynamicLimit(limit)
+    , staticLimit(0)
+    , dynamicOffsets(offset)
+    , staticOffsets{}
+    , hasDynamicLimit(true)
+    , hasDynamicOffsets(true)
+{}
 
 Pointer::Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset)
-	: base(base),
-		dynamicLimit(0), staticLimit(limit),
-		dynamicOffsets(offset), staticOffsets{},
-		hasDynamicLimit(false), hasDynamicOffsets(true) {}
+    : base(base)
+    , dynamicLimit(0)
+    , staticLimit(limit)
+    , dynamicOffsets(offset)
+    , staticOffsets{}
+    , hasDynamicLimit(false)
+    , hasDynamicOffsets(true)
+{}
 
-Pointer& Pointer::operator += (Int i)
+Pointer &Pointer::operator+=(Int i)
 {
 	dynamicOffsets += i;
 	hasDynamicOffsets = true;
 	return *this;
 }
 
-Pointer& Pointer::operator *= (Int i)
+Pointer &Pointer::operator*=(Int i)
 {
 	dynamicOffsets = offsets() * i;
 	staticOffsets = {};
@@ -868,16 +893,26 @@
 	return *this;
 }
 
-Pointer Pointer::operator + (SIMD::Int i) { Pointer p = *this; p += i; return p; }
-Pointer Pointer::operator * (SIMD::Int i) { Pointer p = *this; p *= i; return p; }
+Pointer Pointer::operator+(SIMD::Int i)
+{
+	Pointer p = *this;
+	p += i;
+	return p;
+}
+Pointer Pointer::operator*(SIMD::Int i)
+{
+	Pointer p = *this;
+	p *= i;
+	return p;
+}
 
-Pointer& Pointer::operator += (int i)
+Pointer &Pointer::operator+=(int i)
 {
 	for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; }
 	return *this;
 }
 
-Pointer& Pointer::operator *= (int i)
+Pointer &Pointer::operator*=(int i)
 {
 	for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] *= i; }
 	if(hasDynamicOffsets)
@@ -887,8 +922,18 @@
 	return *this;
 }
 
-Pointer Pointer::operator + (int i) { Pointer p = *this; p += i; return p; }
-Pointer Pointer::operator * (int i) { Pointer p = *this; p *= i; return p; }
+Pointer Pointer::operator+(int i)
+{
+	Pointer p = *this;
+	p += i;
+	return p;
+}
+Pointer Pointer::operator*(int i)
+{
+	Pointer p = *this;
+	p *= i;
+	return p;
+}
 
 SIMD::Int Pointer::offsets() const
 {
@@ -910,10 +955,10 @@
 		// Common fast paths.
 		static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
 		return SIMD::Int(
-			(staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
-			(staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
-			(staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
-			(staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0);
+		    (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
+		    (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
+		    (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
+		    (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0);
 	}
 
 	return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit()));
@@ -932,14 +977,14 @@
 		{
 			switch(robustness)
 			{
-			case OutOfBoundsBehavior::UndefinedBehavior:
-				// With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
-				// but since it can't know in advance which branches are taken this must be true even for inactives lanes.
-				return true;
-			case OutOfBoundsBehavior::Nullify:
-			case OutOfBoundsBehavior::RobustBufferAccess:
-			case OutOfBoundsBehavior::UndefinedValue:
-				return false;
+				case OutOfBoundsBehavior::UndefinedBehavior:
+					// With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
+					// but since it can't know in advance which branches are taken this must be true even for inactives lanes.
+					return true;
+				case OutOfBoundsBehavior::Nullify:
+				case OutOfBoundsBehavior::RobustBufferAccess:
+				case OutOfBoundsBehavior::UndefinedValue:
+					return false;
 			}
 		}
 	}
@@ -968,7 +1013,7 @@
 	{
 		auto o = offsets();
 		static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
-		return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1*step, 2*step, 3*step, 0))) == 0;
+		return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1 * step, 2 * step, 3 * step, 0))) == 0;
 	}
 	return hasStaticSequentialOffsets(step);
 }
@@ -983,7 +1028,7 @@
 	}
 	for(int i = 1; i < SIMD::Width; i++)
 	{
-		if(staticOffsets[i-1] + int32_t(step) != staticOffsets[i]) { return false; }
+		if(staticOffsets[i - 1] + int32_t(step) != staticOffsets[i]) { return false; }
 	}
 	return true;
 }
@@ -1010,7 +1055,7 @@
 	}
 	for(int i = 1; i < SIMD::Width; i++)
 	{
-		if(staticOffsets[i-1] != staticOffsets[i]) { return false; }
+		if(staticOffsets[i - 1] != staticOffsets[i]) { return false; }
 	}
 	return true;
 }
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index 4026d27..a911be2 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp
@@ -19,9 +19,9 @@
 #include "Reactor/Reactor.hpp"
 #include "Vulkan/VkDebug.hpp"
 
-#include <atomic> // std::memory_order
 #include <array>
-#include <utility> // std::pair
+#include <atomic>   // std::memory_order
+#include <utility>  // std::pair
 
 namespace sw {
 
@@ -87,17 +87,17 @@
 	Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset);
 	Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset);
 
-	Pointer& operator += (Int i);
-	Pointer& operator *= (Int i);
+	Pointer &operator+=(Int i);
+	Pointer &operator*=(Int i);
 
-	Pointer operator + (SIMD::Int i);
-	Pointer operator * (SIMD::Int i);
+	Pointer operator+(SIMD::Int i);
+	Pointer operator*(SIMD::Int i);
 
-	Pointer& operator += (int i);
-	Pointer& operator *= (int i);
+	Pointer &operator+=(int i);
+	Pointer &operator*=(int i);
 
-	Pointer operator + (int i);
-	Pointer operator * (int i);
+	Pointer operator+(int i);
+	Pointer operator*(int i);
 
 	SIMD::Int offsets() const;
 
@@ -135,23 +135,37 @@
 	rr::Pointer<rr::Byte> base;
 
 	// Upper (non-inclusive) limit for offsets from base.
-	rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero.
+	rr::Int dynamicLimit;  // If hasDynamicLimit is false, dynamicLimit is zero.
 	unsigned int staticLimit;
 
 	// Per lane offsets from base.
-	SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
+	SIMD::Int dynamicOffsets;  // If hasDynamicOffsets is false, all dynamicOffsets are zero.
 	std::array<int32_t, SIMD::Width> staticOffsets;
 
 	bool hasDynamicLimit;    // True if dynamicLimit is non-zero.
 	bool hasDynamicOffsets;  // True if any dynamicOffsets are non-zero.
 };
 
-template <typename T> struct Element {};
-template <> struct Element<Float> { using type = rr::Float; };
-template <> struct Element<Int>   { using type = rr::Int; };
-template <> struct Element<UInt>  { using type = rr::UInt; };
+template<typename T>
+struct Element
+{};
+template<>
+struct Element<Float>
+{
+	using type = rr::Float;
+};
+template<>
+struct Element<Int>
+{
+	using type = rr::Int;
+};
+template<>
+struct Element<UInt>
+{
+	using type = rr::UInt;
+};
 
-} // namespace SIMD
+}  // namespace SIMD
 
 Float4 exponential2(RValue<Float4> x, bool pp = false);
 Float4 logarithm2(RValue<Float4> x, bool pp = false);
@@ -161,8 +175,8 @@
 Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
 Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
 Float4 modulo(RValue<Float4> x, RValue<Float4> y);
-Float4 sine_pi(RValue<Float4> x, bool pp = false);     // limited to [-pi, pi] range
-Float4 cosine_pi(RValue<Float4> x, bool pp = false);   // limited to [-pi, pi] range
+Float4 sine_pi(RValue<Float4> x, bool pp = false);    // limited to [-pi, pi] range
+Float4 cosine_pi(RValue<Float4> x, bool pp = false);  // limited to [-pi, pi] range
 Float4 sine(RValue<Float4> x, bool pp = false);
 Float4 cosine(RValue<Float4> x, bool pp = false);
 Float4 tangent(RValue<Float4> x, bool pp = false);
@@ -199,10 +213,10 @@
 
 rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints);
 
-template <typename T>
+template<typename T>
 inline rr::RValue<T> AndAll(rr::RValue<T> const &mask);
 
-template <typename T>
+template<typename T>
 inline rr::RValue<T> OrAll(rr::RValue<T> const &mask);
 
 rr::RValue<sw::SIMD::Float> Sign(rr::RValue<sw::SIMD::Float> const &val);
@@ -224,9 +238,9 @@
 
 // Performs a fused-multiply add, returning a * b + c.
 rr::RValue<sw::SIMD::Float> FMA(
-		rr::RValue<sw::SIMD::Float> const &a,
-		rr::RValue<sw::SIMD::Float> const &b,
-		rr::RValue<sw::SIMD::Float> const &c);
+    rr::RValue<sw::SIMD::Float> const &a,
+    rr::RValue<sw::SIMD::Float> const &b,
+    rr::RValue<sw::SIMD::Float> const &c);
 
 // Returns the exponent of the floating point number f.
 // Assumes IEEE 754
@@ -244,39 +258,39 @@
 
 // Returns the determinant of a 2x2 matrix.
 rr::RValue<sw::SIMD::Float> Determinant(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
-	rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
+    rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
 
 // Returns the determinant of a 3x3 matrix.
 rr::RValue<sw::SIMD::Float> Determinant(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
-	rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
-	rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
+    rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
+    rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
 
 // Returns the determinant of a 4x4 matrix.
 rr::RValue<sw::SIMD::Float> Determinant(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
-	rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
-	rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
-	rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
+    rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
+    rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
+    rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
 
 // Returns the inverse of a 2x2 matrix.
 std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
-	rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
+    rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
 
 // Returns the inverse of a 3x3 matrix.
 std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
-	rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
-	rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
+    rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
+    rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
 
 // Returns the inverse of a 4x4 matrix.
 std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse(
-	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
-	rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
-	rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
-	rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
+    rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
+    rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
+    rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
+    rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
 
 ////////////////////////////////////////////////////////////////////////////
 // Inline functions
@@ -307,14 +321,14 @@
 	{
 		switch(robustness)
 		{
-		case OutOfBoundsBehavior::Nullify:
-		case OutOfBoundsBehavior::RobustBufferAccess:
-		case OutOfBoundsBehavior::UndefinedValue:
-			mask &= isInBounds(sizeof(float), robustness);  // Disable out-of-bounds reads.
-			break;
-		case OutOfBoundsBehavior::UndefinedBehavior:
-			// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
-			break;
+			case OutOfBoundsBehavior::Nullify:
+			case OutOfBoundsBehavior::RobustBufferAccess:
+			case OutOfBoundsBehavior::UndefinedValue:
+				mask &= isInBounds(sizeof(float), robustness);  // Disable out-of-bounds reads.
+				break;
+			case OutOfBoundsBehavior::UndefinedBehavior:
+				// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+				break;
 		}
 	}
 
@@ -339,14 +353,14 @@
 		bool zeroMaskedLanes = true;
 		switch(robustness)
 		{
-		case OutOfBoundsBehavior::Nullify:
-		case OutOfBoundsBehavior::RobustBufferAccess:  // Must either return an in-bounds value, or zero.
-			zeroMaskedLanes = true;
-			break;
-		case OutOfBoundsBehavior::UndefinedValue:
-		case OutOfBoundsBehavior::UndefinedBehavior:
-			zeroMaskedLanes = false;
-			break;
+			case OutOfBoundsBehavior::Nullify:
+			case OutOfBoundsBehavior::RobustBufferAccess:  // Must either return an in-bounds value, or zero.
+				zeroMaskedLanes = true;
+				break;
+			case OutOfBoundsBehavior::UndefinedValue:
+			case OutOfBoundsBehavior::UndefinedBehavior:
+				zeroMaskedLanes = false;
+				break;
 		}
 
 		if(hasStaticSequentialOffsets(sizeof(float)))
@@ -399,14 +413,14 @@
 
 	switch(robustness)
 	{
-	case OutOfBoundsBehavior::Nullify:
-	case OutOfBoundsBehavior::RobustBufferAccess:  // TODO: Allows writing anywhere within bounds. Could be faster than masking.
-	case OutOfBoundsBehavior::UndefinedValue:  // Should not be used for store operations. Treat as robust buffer access.
-		mask &= isInBounds(sizeof(float), robustness);  // Disable out-of-bounds writes.
-		break;
-	case OutOfBoundsBehavior::UndefinedBehavior:
-		// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
-		break;
+		case OutOfBoundsBehavior::Nullify:
+		case OutOfBoundsBehavior::RobustBufferAccess:       // TODO: Allows writing anywhere within bounds. Could be faster than masking.
+		case OutOfBoundsBehavior::UndefinedValue:           // Should not be used for store operations. Treat as robust buffer access.
+			mask &= isInBounds(sizeof(float), robustness);  // Disable out-of-bounds writes.
+			break;
+		case OutOfBoundsBehavior::UndefinedBehavior:
+			// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+			break;
 	}
 
 	if(!atomic && order == std::memory_order_relaxed)
@@ -420,9 +434,9 @@
 				auto elect = mask & ~(v0111 & (mask.xxyz | mask.xxxy | mask.xxxx));
 				auto maskedVal = As<SIMD::Int>(val) & elect;
 				auto scalarVal = Extract(maskedVal, 0) |
-					Extract(maskedVal, 1) |
-					Extract(maskedVal, 2) |
-					Extract(maskedVal, 3);
+				                 Extract(maskedVal, 1) |
+				                 Extract(maskedVal, 2) |
+				                 Extract(maskedVal, 3);
 				*rr::Pointer<EL>(base + staticOffsets[0], alignment) = As<EL>(scalarVal);
 			}
 		}
@@ -476,29 +490,30 @@
 	Store(T(val), robustness, mask, atomic, order);
 }
 
-template <typename T>
+template<typename T>
 inline rr::RValue<T> AndAll(rr::RValue<T> const &mask)
 {
-	T v1 = mask;              // [x]    [y]    [z]    [w]
-	T v2 = v1.xzxz & v1.ywyw; // [xy]   [zw]   [xy]   [zw]
-	return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
+	T v1 = mask;               // [x]    [y]    [z]    [w]
+	T v2 = v1.xzxz & v1.ywyw;  // [xy]   [zw]   [xy]   [zw]
+	return v2.xxxx & v2.yyyy;  // [xyzw] [xyzw] [xyzw] [xyzw]
 }
 
-template <typename T>
+template<typename T>
 inline rr::RValue<T> OrAll(rr::RValue<T> const &mask)
 {
-	T v1 = mask;              // [x]    [y]    [z]    [w]
-	T v2 = v1.xzxz | v1.ywyw; // [xy]   [zw]   [xy]   [zw]
-	return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
+	T v1 = mask;               // [x]    [y]    [z]    [w]
+	T v2 = v1.xzxz | v1.ywyw;  // [xy]   [zw]   [xy]   [zw]
+	return v2.xxxx | v2.yyyy;  // [xyzw] [xyzw] [xyzw] [xyzw]
 }
 
-} // namespace sw
+}  // namespace sw
 
 #ifdef ENABLE_RR_PRINT
 namespace rr {
-template <> struct PrintValue::Ty<sw::Vector4f>
+template<>
+struct PrintValue::Ty<sw::Vector4f>
 {
-	static std::string fmt(const sw::Vector4f& v)
+	static std::string fmt(const sw::Vector4f &v)
 	{
 		return "[x: " + PrintValue::fmt(v.x) +
 		       ", y: " + PrintValue::fmt(v.y) +
@@ -506,14 +521,15 @@
 		       ", w: " + PrintValue::fmt(v.w) + "]";
 	}
 
-	static std::vector<rr::Value*> val(const sw::Vector4f& v)
+	static std::vector<rr::Value *> val(const sw::Vector4f &v)
 	{
 		return PrintValue::vals(v.x, v.y, v.z, v.w);
 	}
 };
-template <> struct PrintValue::Ty<sw::Vector4s>
+template<>
+struct PrintValue::Ty<sw::Vector4s>
 {
-	static std::string fmt(const sw::Vector4s& v)
+	static std::string fmt(const sw::Vector4s &v)
 	{
 		return "[x: " + PrintValue::fmt(v.x) +
 		       ", y: " + PrintValue::fmt(v.y) +
@@ -521,13 +537,13 @@
 		       ", w: " + PrintValue::fmt(v.w) + "]";
 	}
 
-	static std::vector<rr::Value*> val(const sw::Vector4s& v)
+	static std::vector<rr::Value *> val(const sw::Vector4s &v)
 	{
 		return PrintValue::vals(v.x, v.y, v.z, v.w);
 	}
 };
 
-}  // namespace sw
-#endif // ENABLE_RR_PRINT
+}  // namespace rr
+#endif  // ENABLE_RR_PRINT
 
-#endif   // sw_ShaderCore_hpp
+#endif  // sw_ShaderCore_hpp
diff --git a/src/Pipeline/SpirvID.hpp b/src/Pipeline/SpirvID.hpp
index 1f4624d..35b910a 100644
--- a/src/Pipeline/SpirvID.hpp
+++ b/src/Pipeline/SpirvID.hpp
@@ -15,8 +15,8 @@
 #ifndef sw_ID_hpp
 #define sw_ID_hpp
 
-#include <unordered_map>
 #include <cstdint>
+#include <unordered_map>
 
 namespace sw {
 
@@ -25,39 +25,43 @@
 // ID; instead it is used to prevent implicit casts between identifiers of
 // different T types.
 // IDs are typically used as a map key to value of type T.
-template <typename T>
+template<typename T>
 class SpirvID
 {
 public:
-	SpirvID() : id(0) {}
-	SpirvID(uint32_t id) : id(id) {}
-	bool operator == (const SpirvID<T>& rhs) const { return id == rhs.id; }
-	bool operator != (const SpirvID<T>& rhs) const { return id != rhs.id; }
-	bool operator < (const SpirvID<T>& rhs) const { return id < rhs.id; }
+	SpirvID()
+	    : id(0)
+	{}
+	SpirvID(uint32_t id)
+	    : id(id)
+	{}
+	bool operator==(const SpirvID<T> &rhs) const { return id == rhs.id; }
+	bool operator!=(const SpirvID<T> &rhs) const { return id != rhs.id; }
+	bool operator<(const SpirvID<T> &rhs) const { return id < rhs.id; }
 
 	// value returns the numerical value of the identifier.
 	uint32_t value() const { return id; }
+
 private:
 	uint32_t id;
 };
 
 // HandleMap<T> is an unordered map of SpirvID<T> to T.
-template <typename T>
+template<typename T>
 using HandleMap = std::unordered_map<SpirvID<T>, T>;
-}
+}  // namespace sw
 
-namespace std
-{
+namespace std {
 // std::hash implementation for sw::SpirvID<T>
 template<typename T>
-struct hash< sw::SpirvID<T> >
+struct hash<sw::SpirvID<T> >
 {
-	std::size_t operator()(const sw::SpirvID<T>& id) const noexcept
+	std::size_t operator()(const sw::SpirvID<T> &id) const noexcept
 	{
 		return std::hash<uint32_t>()(id.value());
 	}
 };
 
-}  // namespace sw
+}  // namespace std
 
 #endif  // sw_ID_hpp
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 11b3770..2350195 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -23,17 +23,18 @@
 namespace sw {
 
 SpirvShader::SpirvShader(
-		uint32_t codeSerialID,
-		VkShaderStageFlagBits pipelineStage,
-		const char *entryPointName,
-		InsnStore const &insns,
-		const vk::RenderPass *renderPass,
-		uint32_t subpassIndex,
-		bool robustBufferAccess)
-			: insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
-			  outputs{MAX_INTERFACE_COMPONENTS},
-			  codeSerialID(codeSerialID),
-			  robustBufferAccess(robustBufferAccess)
+    uint32_t codeSerialID,
+    VkShaderStageFlagBits pipelineStage,
+    const char *entryPointName,
+    InsnStore const &insns,
+    const vk::RenderPass *renderPass,
+    uint32_t subpassIndex,
+    bool robustBufferAccess)
+    : insns{ insns }
+    , inputs{ MAX_INTERFACE_COMPONENTS }
+    , outputs{ MAX_INTERFACE_COMPONENTS }
+    , codeSerialID(codeSerialID)
+    , robustBufferAccess(robustBufferAccess)
 {
 	ASSERT(insns.size() > 0);
 
@@ -46,7 +47,8 @@
 		{
 			auto attachmentIndex = subpass.pInputAttachments[i].attachment;
 			inputAttachmentFormats.push_back(attachmentIndex != VK_ATTACHMENT_UNUSED
-											 ? renderPass->getAttachment(attachmentIndex).format : VK_FORMAT_UNDEFINED);
+			                                     ? renderPass->getAttachment(attachmentIndex).format
+			                                     : VK_FORMAT_UNDEFINED);
 		}
 	}
 
@@ -63,403 +65,404 @@
 
 		switch(opcode)
 		{
-		case spv::OpEntryPoint:
-		{
-			executionModel = spv::ExecutionModel(insn.word(1));
-			auto id = Function::ID(insn.word(2));
-			auto name = insn.string(3);
-			auto stage = executionModelToStage(executionModel);
-			if(stage == pipelineStage && strcmp(name, entryPointName) == 0)
+			case spv::OpEntryPoint:
 			{
-				ASSERT_MSG(entryPoint == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
-				entryPoint = id;
-			}
-			break;
-		}
-
-		case spv::OpExecutionMode:
-			ProcessExecutionMode(insn);
-			break;
-
-		case spv::OpDecorate:
-		{
-			TypeOrObjectID targetId = insn.word(1);
-			auto decoration = static_cast<spv::Decoration>(insn.word(2));
-			uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0;
-
-			decorations[targetId].Apply(decoration, value);
-
-			switch(decoration)
-			{
-			case spv::DecorationDescriptorSet:
-				descriptorDecorations[targetId].DescriptorSet = value;
-				break;
-			case spv::DecorationBinding:
-				descriptorDecorations[targetId].Binding = value;
-				break;
-			case spv::DecorationInputAttachmentIndex:
-				descriptorDecorations[targetId].InputAttachmentIndex = value;
-				break;
-			default:
-				// Only handling descriptor decorations here.
+				executionModel = spv::ExecutionModel(insn.word(1));
+				auto id = Function::ID(insn.word(2));
+				auto name = insn.string(3);
+				auto stage = executionModelToStage(executionModel);
+				if(stage == pipelineStage && strcmp(name, entryPointName) == 0)
+				{
+					ASSERT_MSG(entryPoint == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
+					entryPoint = id;
+				}
 				break;
 			}
 
-			if(decoration == spv::DecorationCentroid)
-				modes.NeedsCentroid = true;
-			break;
-		}
+			case spv::OpExecutionMode:
+				ProcessExecutionMode(insn);
+				break;
 
-		case spv::OpMemberDecorate:
-		{
-			Type::ID targetId = insn.word(1);
-			auto memberIndex = insn.word(2);
-			auto decoration = static_cast<spv::Decoration>(insn.word(3));
-			uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0;
-
-			auto &d = memberDecorations[targetId];
-			if(memberIndex >= d.size())
-				d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
-
-			d[memberIndex].Apply(decoration, value);
-
-			if(decoration == spv::DecorationCentroid)
-				modes.NeedsCentroid = true;
-			break;
-		}
-
-		case spv::OpDecorationGroup:
-			// Nothing to do here. We don't need to record the definition of the group; we'll just have
-			// the bundle of decorations float around. If we were to ever walk the decorations directly,
-			// we might think about introducing this as a real Object.
-			break;
-
-		case spv::OpGroupDecorate:
-		{
-			uint32_t group = insn.word(1);
-			auto const &groupDecorations = decorations[group];
-			auto const &descriptorGroupDecorations = descriptorDecorations[group];
-			for(auto i = 2u; i < insn.wordCount(); i++)
+			case spv::OpDecorate:
 			{
-				// Remaining operands are targets to apply the group to.
-				uint32_t target = insn.word(i);
-				decorations[target].Apply(groupDecorations);
-				descriptorDecorations[target].Apply(descriptorGroupDecorations);
+				TypeOrObjectID targetId = insn.word(1);
+				auto decoration = static_cast<spv::Decoration>(insn.word(2));
+				uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0;
+
+				decorations[targetId].Apply(decoration, value);
+
+				switch(decoration)
+				{
+					case spv::DecorationDescriptorSet:
+						descriptorDecorations[targetId].DescriptorSet = value;
+						break;
+					case spv::DecorationBinding:
+						descriptorDecorations[targetId].Binding = value;
+						break;
+					case spv::DecorationInputAttachmentIndex:
+						descriptorDecorations[targetId].InputAttachmentIndex = value;
+						break;
+					default:
+						// Only handling descriptor decorations here.
+						break;
+				}
+
+				if(decoration == spv::DecorationCentroid)
+					modes.NeedsCentroid = true;
+				break;
 			}
 
-			break;
-		}
-
-		case spv::OpGroupMemberDecorate:
-		{
-			auto const &srcDecorations = decorations[insn.word(1)];
-			for(auto i = 2u; i < insn.wordCount(); i += 2)
+			case spv::OpMemberDecorate:
 			{
-				// remaining operands are pairs of <id>, literal for members to apply to.
-				auto &d = memberDecorations[insn.word(i)];
-				auto memberIndex = insn.word(i + 1);
+				Type::ID targetId = insn.word(1);
+				auto memberIndex = insn.word(2);
+				auto decoration = static_cast<spv::Decoration>(insn.word(3));
+				uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0;
+
+				auto &d = memberDecorations[targetId];
 				if(memberIndex >= d.size())
-					d.resize(memberIndex + 1);    // on demand resize, see above...
-				d[memberIndex].Apply(srcDecorations);
+					d.resize(memberIndex + 1);  // on demand; exact size would require another pass...
+
+				d[memberIndex].Apply(decoration, value);
+
+				if(decoration == spv::DecorationCentroid)
+					modes.NeedsCentroid = true;
+				break;
 			}
-			break;
-		}
 
-		case spv::OpLabel:
-		{
-			ASSERT(currentBlock.value() == 0);
-			currentBlock = Block::ID(insn.word(1));
-			blockStart = insn;
-			break;
-		}
-
-		// Branch Instructions (subset of Termination Instructions):
-		case spv::OpBranch:
-		case spv::OpBranchConditional:
-		case spv::OpSwitch:
-		case spv::OpReturn:
-		// fallthrough
-
-		// Termination instruction:
-		case spv::OpKill:
-		case spv::OpUnreachable:
-		{
-			ASSERT(currentBlock.value() != 0);
-			ASSERT(currentFunction.value() != 0);
-
-			auto blockEnd = insn; blockEnd++;
-			functions[currentFunction].blocks[currentBlock] = Block(blockStart, blockEnd);
-			currentBlock = Block::ID(0);
-
-			if(opcode == spv::OpKill)
-			{
-				modes.ContainsKill = true;
-			}
-			break;
-		}
-
-		case spv::OpLoopMerge:
-		case spv::OpSelectionMerge:
-			break; // Nothing to do in analysis pass.
-
-		case spv::OpTypeVoid:
-		case spv::OpTypeBool:
-		case spv::OpTypeInt:
-		case spv::OpTypeFloat:
-		case spv::OpTypeVector:
-		case spv::OpTypeMatrix:
-		case spv::OpTypeImage:
-		case spv::OpTypeSampler:
-		case spv::OpTypeSampledImage:
-		case spv::OpTypeArray:
-		case spv::OpTypeRuntimeArray:
-		case spv::OpTypeStruct:
-		case spv::OpTypePointer:
-		case spv::OpTypeFunction:
-			DeclareType(insn);
-			break;
-
-		case spv::OpVariable:
-		{
-			Type::ID typeId = insn.word(1);
-			Object::ID resultId = insn.word(2);
-			auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
-
-			auto &object = defs[resultId];
-			object.kind = Object::Kind::Pointer;
-			object.definition = insn;
-			object.type = typeId;
-
-			ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
-			ASSERT(getType(typeId).storageClass == storageClass);
-
-			switch(storageClass)
-			{
-			case spv::StorageClassInput:
-			case spv::StorageClassOutput:
-				ProcessInterfaceVariable(object);
+			case spv::OpDecorationGroup:
+				// Nothing to do here. We don't need to record the definition of the group; we'll just have
+				// the bundle of decorations float around. If we were to ever walk the decorations directly,
+				// we might think about introducing this as a real Object.
 				break;
 
-			case spv::StorageClassUniform:
-			case spv::StorageClassStorageBuffer:
-				object.kind = Object::Kind::DescriptorSet;
+			case spv::OpGroupDecorate:
+			{
+				uint32_t group = insn.word(1);
+				auto const &groupDecorations = decorations[group];
+				auto const &descriptorGroupDecorations = descriptorDecorations[group];
+				for(auto i = 2u; i < insn.wordCount(); i++)
+				{
+					// Remaining operands are targets to apply the group to.
+					uint32_t target = insn.word(i);
+					decorations[target].Apply(groupDecorations);
+					descriptorDecorations[target].Apply(descriptorGroupDecorations);
+				}
+
+				break;
+			}
+
+			case spv::OpGroupMemberDecorate:
+			{
+				auto const &srcDecorations = decorations[insn.word(1)];
+				for(auto i = 2u; i < insn.wordCount(); i += 2)
+				{
+					// remaining operands are pairs of <id>, literal for members to apply to.
+					auto &d = memberDecorations[insn.word(i)];
+					auto memberIndex = insn.word(i + 1);
+					if(memberIndex >= d.size())
+						d.resize(memberIndex + 1);  // on demand resize, see above...
+					d[memberIndex].Apply(srcDecorations);
+				}
+				break;
+			}
+
+			case spv::OpLabel:
+			{
+				ASSERT(currentBlock.value() == 0);
+				currentBlock = Block::ID(insn.word(1));
+				blockStart = insn;
+				break;
+			}
+
+			// Branch Instructions (subset of Termination Instructions):
+			case spv::OpBranch:
+			case spv::OpBranchConditional:
+			case spv::OpSwitch:
+			case spv::OpReturn:
+			// fallthrough
+
+			// Termination instruction:
+			case spv::OpKill:
+			case spv::OpUnreachable:
+			{
+				ASSERT(currentBlock.value() != 0);
+				ASSERT(currentFunction.value() != 0);
+
+				auto blockEnd = insn;
+				blockEnd++;
+				functions[currentFunction].blocks[currentBlock] = Block(blockStart, blockEnd);
+				currentBlock = Block::ID(0);
+
+				if(opcode == spv::OpKill)
+				{
+					modes.ContainsKill = true;
+				}
+				break;
+			}
+
+			case spv::OpLoopMerge:
+			case spv::OpSelectionMerge:
+				break;  // Nothing to do in analysis pass.
+
+			case spv::OpTypeVoid:
+			case spv::OpTypeBool:
+			case spv::OpTypeInt:
+			case spv::OpTypeFloat:
+			case spv::OpTypeVector:
+			case spv::OpTypeMatrix:
+			case spv::OpTypeImage:
+			case spv::OpTypeSampler:
+			case spv::OpTypeSampledImage:
+			case spv::OpTypeArray:
+			case spv::OpTypeRuntimeArray:
+			case spv::OpTypeStruct:
+			case spv::OpTypePointer:
+			case spv::OpTypeFunction:
+				DeclareType(insn);
 				break;
 
-			case spv::StorageClassPushConstant:
-			case spv::StorageClassPrivate:
-			case spv::StorageClassFunction:
-			case spv::StorageClassUniformConstant:
-				break; // Correctly handled.
-
-			case spv::StorageClassWorkgroup:
+			case spv::OpVariable:
 			{
-				auto &elTy = getType(getType(typeId).element);
-				auto sizeInBytes = elTy.sizeInComponents * static_cast<uint32_t>(sizeof(float));
-				workgroupMemory.allocate(resultId, sizeInBytes);
+				Type::ID typeId = insn.word(1);
+				Object::ID resultId = insn.word(2);
+				auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
+
+				auto &object = defs[resultId];
 				object.kind = Object::Kind::Pointer;
-				break;
-			}
-			case spv::StorageClassAtomicCounter:
-			case spv::StorageClassImage:
-				UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
-				break;
+				object.definition = insn;
+				object.type = typeId;
 
-			case spv::StorageClassCrossWorkgroup:
-				UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
-				break;
+				ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
+				ASSERT(getType(typeId).storageClass == storageClass);
 
-			case spv::StorageClassGeneric:
-				UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
-				break;
-
-			default:
-				UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
-				break;
-			}
-			break;
-		}
-
-		case spv::OpConstant:
-		case spv::OpSpecConstant:
-			CreateConstant(insn).constantValue[0] = insn.word(3);
-			break;
-		case spv::OpConstantFalse:
-		case spv::OpSpecConstantFalse:
-			CreateConstant(insn).constantValue[0] = 0;    // Represent Boolean false as zero.
-			break;
-		case spv::OpConstantTrue:
-		case spv::OpSpecConstantTrue:
-			CreateConstant(insn).constantValue[0] = ~0u;  // Represent Boolean true as all bits set.
-			break;
-		case spv::OpConstantNull:
-		case spv::OpUndef:
-		{
-			// TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
-			// OpConstantNull forms a constant of arbitrary type, all zeros.
-			auto &object = CreateConstant(insn);
-			auto &objectTy = getType(object.type);
-			for(auto i = 0u; i < objectTy.sizeInComponents; i++)
-			{
-				object.constantValue[i] = 0;
-			}
-			break;
-		}
-		case spv::OpConstantComposite:
-		case spv::OpSpecConstantComposite:
-		{
-			auto &object = CreateConstant(insn);
-			auto offset = 0u;
-			for(auto i = 0u; i < insn.wordCount() - 3; i++)
-			{
-				auto &constituent = getObject(insn.word(i + 3));
-				auto &constituentTy = getType(constituent.type);
-				for(auto j = 0u; j < constituentTy.sizeInComponents; j++)
+				switch(storageClass)
 				{
-					object.constantValue[offset++] = constituent.constantValue[j];
+					case spv::StorageClassInput:
+					case spv::StorageClassOutput:
+						ProcessInterfaceVariable(object);
+						break;
+
+					case spv::StorageClassUniform:
+					case spv::StorageClassStorageBuffer:
+						object.kind = Object::Kind::DescriptorSet;
+						break;
+
+					case spv::StorageClassPushConstant:
+					case spv::StorageClassPrivate:
+					case spv::StorageClassFunction:
+					case spv::StorageClassUniformConstant:
+						break;  // Correctly handled.
+
+					case spv::StorageClassWorkgroup:
+					{
+						auto &elTy = getType(getType(typeId).element);
+						auto sizeInBytes = elTy.sizeInComponents * static_cast<uint32_t>(sizeof(float));
+						workgroupMemory.allocate(resultId, sizeInBytes);
+						object.kind = Object::Kind::Pointer;
+						break;
+					}
+					case spv::StorageClassAtomicCounter:
+					case spv::StorageClassImage:
+						UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
+						break;
+
+					case spv::StorageClassCrossWorkgroup:
+						UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
+						break;
+
+					case spv::StorageClassGeneric:
+						UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
+						break;
+
+					default:
+						UNREACHABLE("Unexpected StorageClass %d", storageClass);  // See Appendix A of the Vulkan spec.
+						break;
 				}
+				break;
 			}
 
-			auto objectId = Object::ID(insn.word(2));
-			auto decorationsIt = decorations.find(objectId);
-			if(decorationsIt != decorations.end() &&
-				decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
+			case spv::OpConstant:
+			case spv::OpSpecConstant:
+				CreateConstant(insn).constantValue[0] = insn.word(3);
+				break;
+			case spv::OpConstantFalse:
+			case spv::OpSpecConstantFalse:
+				CreateConstant(insn).constantValue[0] = 0;  // Represent Boolean false as zero.
+				break;
+			case spv::OpConstantTrue:
+			case spv::OpSpecConstantTrue:
+				CreateConstant(insn).constantValue[0] = ~0u;  // Represent Boolean true as all bits set.
+				break;
+			case spv::OpConstantNull:
+			case spv::OpUndef:
 			{
-				// https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
-				// Decorating an object with the WorkgroupSize built-in
-				// decoration will make that object contain the dimensions
-				// of a local workgroup. If an object is decorated with the
-				// WorkgroupSize decoration, this must take precedence over
-				// any execution mode set for LocalSize.
-				// The object decorated with WorkgroupSize must be declared
-				// as a three-component vector of 32-bit integers.
-				ASSERT(getType(object.type).sizeInComponents == 3);
-				modes.WorkgroupSizeX = object.constantValue[0];
-				modes.WorkgroupSizeY = object.constantValue[1];
-				modes.WorkgroupSizeZ = object.constantValue[2];
-			}
-			break;
-		}
-		case spv::OpSpecConstantOp:
-			EvalSpecConstantOp(insn);
-			break;
-
-		case spv::OpCapability:
-		{
-			auto capability = static_cast<spv::Capability>(insn.word(1));
-			switch(capability)
-			{
-			case spv::CapabilityMatrix: capabilities.Matrix = true; break;
-			case spv::CapabilityShader: capabilities.Shader = true; break;
-			case spv::CapabilityClipDistance: capabilities.ClipDistance = true; break;
-			case spv::CapabilityCullDistance: capabilities.CullDistance = true; break;
-			case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break;
-			case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break;
-			case spv::CapabilityImage1D: capabilities.Image1D = true; break;
-			case spv::CapabilityImageCubeArray: capabilities.ImageCubeArray = true; break;
-			case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break;
-			case spv::CapabilitySampledCubeArray: capabilities.SampledCubeArray = true; break;
-			case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break;
-			case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
-			case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break;
-			case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
-			case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
-			case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
-			case spv::CapabilityGroupNonUniformArithmetic: capabilities.GroupNonUniformArithmetic = true; break;
-			case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
-			case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
-			case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
-			case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
-			case spv::CapabilityMultiView: capabilities.MultiView = true; break;
-			default:
-				UNSUPPORTED("Unsupported capability %u", insn.word(1));
-			}
-			break; // Various capabilities will be declared, but none affect our code generation at this point.
-		}
-
-		case spv::OpMemoryModel:
-			break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
-
-		case spv::OpFunction:
-		{
-			auto functionId = Function::ID(insn.word(2));
-			ASSERT_MSG(currentFunction == 0, "Functions %d and %d overlap", currentFunction.value(), functionId.value());
-			currentFunction = functionId;
-			auto &function = functions[functionId];
-			function.result = Type::ID(insn.word(1));
-			function.type = Type::ID(insn.word(4));
-			// Scan forward to find the function's label.
-			for(auto it = insn; it != end() && function.entry == 0; it++)
-			{
-				switch(it.opcode())
+				// TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
+				// OpConstantNull forms a constant of arbitrary type, all zeros.
+				auto &object = CreateConstant(insn);
+				auto &objectTy = getType(object.type);
+				for(auto i = 0u; i < objectTy.sizeInComponents; i++)
 				{
-				case spv::OpFunction:
-				case spv::OpFunctionParameter:
-					break;
-				case spv::OpLabel:
-					function.entry = Block::ID(it.word(1));
-					break;
-				default:
-					WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
+					object.constantValue[i] = 0;
 				}
+				break;
 			}
-			ASSERT_MSG(function.entry != 0, "Function<%d> has no label", currentFunction.value());
-			break;
-		}
-
-		case spv::OpFunctionEnd:
-			currentFunction = 0;
-			break;
-
-		case spv::OpExtInstImport:
-		{
-			// We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
-			// Valid shaders will not attempt to import any other instruction sets.
-			auto ext = insn.string(2);
-			if(0 != strcmp("GLSL.std.450", ext))
+			case spv::OpConstantComposite:
+			case spv::OpSpecConstantComposite:
 			{
-				UNSUPPORTED("SPIR-V Extension: %s", ext);
+				auto &object = CreateConstant(insn);
+				auto offset = 0u;
+				for(auto i = 0u; i < insn.wordCount() - 3; i++)
+				{
+					auto &constituent = getObject(insn.word(i + 3));
+					auto &constituentTy = getType(constituent.type);
+					for(auto j = 0u; j < constituentTy.sizeInComponents; j++)
+					{
+						object.constantValue[offset++] = constituent.constantValue[j];
+					}
+				}
+
+				auto objectId = Object::ID(insn.word(2));
+				auto decorationsIt = decorations.find(objectId);
+				if(decorationsIt != decorations.end() &&
+				   decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
+				{
+					// https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
+					// Decorating an object with the WorkgroupSize built-in
+					// decoration will make that object contain the dimensions
+					// of a local workgroup. If an object is decorated with the
+					// WorkgroupSize decoration, this must take precedence over
+					// any execution mode set for LocalSize.
+					// The object decorated with WorkgroupSize must be declared
+					// as a three-component vector of 32-bit integers.
+					ASSERT(getType(object.type).sizeInComponents == 3);
+					modes.WorkgroupSizeX = object.constantValue[0];
+					modes.WorkgroupSizeY = object.constantValue[1];
+					modes.WorkgroupSizeZ = object.constantValue[2];
+				}
+				break;
 			}
-			break;
-		}
-		case spv::OpName:
-		case spv::OpMemberName:
-		case spv::OpSource:
-		case spv::OpSourceContinued:
-		case spv::OpSourceExtension:
-		case spv::OpLine:
-		case spv::OpNoLine:
-		case spv::OpModuleProcessed:
-		case spv::OpString:
-			// No semantic impact
-			break;
+			case spv::OpSpecConstantOp:
+				EvalSpecConstantOp(insn);
+				break;
 
-		case spv::OpFunctionParameter:
-			// These should have all been removed by preprocessing passes. If we see them here,
-			// our assumptions are wrong and we will probably generate wrong code.
-			UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str());
-			break;
+			case spv::OpCapability:
+			{
+				auto capability = static_cast<spv::Capability>(insn.word(1));
+				switch(capability)
+				{
+					case spv::CapabilityMatrix: capabilities.Matrix = true; break;
+					case spv::CapabilityShader: capabilities.Shader = true; break;
+					case spv::CapabilityClipDistance: capabilities.ClipDistance = true; break;
+					case spv::CapabilityCullDistance: capabilities.CullDistance = true; break;
+					case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break;
+					case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break;
+					case spv::CapabilityImage1D: capabilities.Image1D = true; break;
+					case spv::CapabilityImageCubeArray: capabilities.ImageCubeArray = true; break;
+					case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break;
+					case spv::CapabilitySampledCubeArray: capabilities.SampledCubeArray = true; break;
+					case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break;
+					case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
+					case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break;
+					case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
+					case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
+					case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
+					case spv::CapabilityGroupNonUniformArithmetic: capabilities.GroupNonUniformArithmetic = true; break;
+					case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
+					case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
+					case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
+					case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
+					case spv::CapabilityMultiView: capabilities.MultiView = true; break;
+					default:
+						UNSUPPORTED("Unsupported capability %u", insn.word(1));
+				}
+				break;  // Various capabilities will be declared, but none affect our code generation at this point.
+			}
 
-		case spv::OpFunctionCall:
-			// TODO(b/141246700): Add full support for spv::OpFunctionCall
-			break;
+			case spv::OpMemoryModel:
+				break;  // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
 
-		case spv::OpFConvert:
-			UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
-			break;
+			case spv::OpFunction:
+			{
+				auto functionId = Function::ID(insn.word(2));
+				ASSERT_MSG(currentFunction == 0, "Functions %d and %d overlap", currentFunction.value(), functionId.value());
+				currentFunction = functionId;
+				auto &function = functions[functionId];
+				function.result = Type::ID(insn.word(1));
+				function.type = Type::ID(insn.word(4));
+				// Scan forward to find the function's label.
+				for(auto it = insn; it != end() && function.entry == 0; it++)
+				{
+					switch(it.opcode())
+					{
+						case spv::OpFunction:
+						case spv::OpFunctionParameter:
+							break;
+						case spv::OpLabel:
+							function.entry = Block::ID(it.word(1));
+							break;
+						default:
+							WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
+					}
+				}
+				ASSERT_MSG(function.entry != 0, "Function<%d> has no label", currentFunction.value());
+				break;
+			}
 
-		case spv::OpSConvert:
-			UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
-			break;
+			case spv::OpFunctionEnd:
+				currentFunction = 0;
+				break;
 
-		case spv::OpUConvert:
-			UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
-			break;
+			case spv::OpExtInstImport:
+			{
+				// We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
+				// Valid shaders will not attempt to import any other instruction sets.
+				auto ext = insn.string(2);
+				if(0 != strcmp("GLSL.std.450", ext))
+				{
+					UNSUPPORTED("SPIR-V Extension: %s", ext);
+				}
+				break;
+			}
+			case spv::OpName:
+			case spv::OpMemberName:
+			case spv::OpSource:
+			case spv::OpSourceContinued:
+			case spv::OpSourceExtension:
+			case spv::OpLine:
+			case spv::OpNoLine:
+			case spv::OpModuleProcessed:
+			case spv::OpString:
+				// No semantic impact
+				break;
 
-		case spv::OpLoad:
-		case spv::OpAccessChain:
-		case spv::OpInBoundsAccessChain:
-		case spv::OpSampledImage:
-		case spv::OpImage:
+			case spv::OpFunctionParameter:
+				// These should have all been removed by preprocessing passes. If we see them here,
+				// our assumptions are wrong and we will probably generate wrong code.
+				UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str());
+				break;
+
+			case spv::OpFunctionCall:
+				// TODO(b/141246700): Add full support for spv::OpFunctionCall
+				break;
+
+			case spv::OpFConvert:
+				UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
+				break;
+
+			case spv::OpSConvert:
+				UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
+				break;
+
+			case spv::OpUConvert:
+				UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
+				break;
+
+			case spv::OpLoad:
+			case spv::OpAccessChain:
+			case spv::OpInBoundsAccessChain:
+			case spv::OpSampledImage:
+			case spv::OpImage:
 			{
 				// Propagate the descriptor decorations to the result.
 				Object::ID resultId = insn.word(2);
@@ -484,202 +487,202 @@
 			}
 			break;
 
-		case spv::OpCompositeConstruct:
-		case spv::OpCompositeInsert:
-		case spv::OpCompositeExtract:
-		case spv::OpVectorShuffle:
-		case spv::OpVectorTimesScalar:
-		case spv::OpMatrixTimesScalar:
-		case spv::OpMatrixTimesVector:
-		case spv::OpVectorTimesMatrix:
-		case spv::OpMatrixTimesMatrix:
-		case spv::OpOuterProduct:
-		case spv::OpTranspose:
-		case spv::OpVectorExtractDynamic:
-		case spv::OpVectorInsertDynamic:
-		// Unary ops
-		case spv::OpNot:
-		case spv::OpBitFieldInsert:
-		case spv::OpBitFieldSExtract:
-		case spv::OpBitFieldUExtract:
-		case spv::OpBitReverse:
-		case spv::OpBitCount:
-		case spv::OpSNegate:
-		case spv::OpFNegate:
-		case spv::OpLogicalNot:
-		case spv::OpQuantizeToF16:
-		// Binary ops
-		case spv::OpIAdd:
-		case spv::OpISub:
-		case spv::OpIMul:
-		case spv::OpSDiv:
-		case spv::OpUDiv:
-		case spv::OpFAdd:
-		case spv::OpFSub:
-		case spv::OpFMul:
-		case spv::OpFDiv:
-		case spv::OpFMod:
-		case spv::OpFRem:
-		case spv::OpFOrdEqual:
-		case spv::OpFUnordEqual:
-		case spv::OpFOrdNotEqual:
-		case spv::OpFUnordNotEqual:
-		case spv::OpFOrdLessThan:
-		case spv::OpFUnordLessThan:
-		case spv::OpFOrdGreaterThan:
-		case spv::OpFUnordGreaterThan:
-		case spv::OpFOrdLessThanEqual:
-		case spv::OpFUnordLessThanEqual:
-		case spv::OpFOrdGreaterThanEqual:
-		case spv::OpFUnordGreaterThanEqual:
-		case spv::OpSMod:
-		case spv::OpSRem:
-		case spv::OpUMod:
-		case spv::OpIEqual:
-		case spv::OpINotEqual:
-		case spv::OpUGreaterThan:
-		case spv::OpSGreaterThan:
-		case spv::OpUGreaterThanEqual:
-		case spv::OpSGreaterThanEqual:
-		case spv::OpULessThan:
-		case spv::OpSLessThan:
-		case spv::OpULessThanEqual:
-		case spv::OpSLessThanEqual:
-		case spv::OpShiftRightLogical:
-		case spv::OpShiftRightArithmetic:
-		case spv::OpShiftLeftLogical:
-		case spv::OpBitwiseOr:
-		case spv::OpBitwiseXor:
-		case spv::OpBitwiseAnd:
-		case spv::OpLogicalOr:
-		case spv::OpLogicalAnd:
-		case spv::OpLogicalEqual:
-		case spv::OpLogicalNotEqual:
-		case spv::OpUMulExtended:
-		case spv::OpSMulExtended:
-		case spv::OpIAddCarry:
-		case spv::OpISubBorrow:
-		case spv::OpDot:
-		case spv::OpConvertFToU:
-		case spv::OpConvertFToS:
-		case spv::OpConvertSToF:
-		case spv::OpConvertUToF:
-		case spv::OpBitcast:
-		case spv::OpSelect:
-		case spv::OpExtInst:
-		case spv::OpIsInf:
-		case spv::OpIsNan:
-		case spv::OpAny:
-		case spv::OpAll:
-		case spv::OpDPdx:
-		case spv::OpDPdxCoarse:
-		case spv::OpDPdy:
-		case spv::OpDPdyCoarse:
-		case spv::OpFwidth:
-		case spv::OpFwidthCoarse:
-		case spv::OpDPdxFine:
-		case spv::OpDPdyFine:
-		case spv::OpFwidthFine:
-		case spv::OpAtomicLoad:
-		case spv::OpAtomicIAdd:
-		case spv::OpAtomicISub:
-		case spv::OpAtomicSMin:
-		case spv::OpAtomicSMax:
-		case spv::OpAtomicUMin:
-		case spv::OpAtomicUMax:
-		case spv::OpAtomicAnd:
-		case spv::OpAtomicOr:
-		case spv::OpAtomicXor:
-		case spv::OpAtomicIIncrement:
-		case spv::OpAtomicIDecrement:
-		case spv::OpAtomicExchange:
-		case spv::OpAtomicCompareExchange:
-		case spv::OpPhi:
-		case spv::OpImageSampleImplicitLod:
-		case spv::OpImageSampleExplicitLod:
-		case spv::OpImageSampleDrefImplicitLod:
-		case spv::OpImageSampleDrefExplicitLod:
-		case spv::OpImageSampleProjImplicitLod:
-		case spv::OpImageSampleProjExplicitLod:
-		case spv::OpImageSampleProjDrefImplicitLod:
-		case spv::OpImageSampleProjDrefExplicitLod:
-		case spv::OpImageGather:
-		case spv::OpImageDrefGather:
-		case spv::OpImageFetch:
-		case spv::OpImageQuerySizeLod:
-		case spv::OpImageQuerySize:
-		case spv::OpImageQueryLod:
-		case spv::OpImageQueryLevels:
-		case spv::OpImageQuerySamples:
-		case spv::OpImageRead:
-		case spv::OpImageTexelPointer:
-		case spv::OpGroupNonUniformElect:
-		case spv::OpGroupNonUniformAll:
-		case spv::OpGroupNonUniformAny:
-		case spv::OpGroupNonUniformAllEqual:
-		case spv::OpGroupNonUniformBroadcast:
-		case spv::OpGroupNonUniformBroadcastFirst:
-		case spv::OpGroupNonUniformBallot:
-		case spv::OpGroupNonUniformInverseBallot:
-		case spv::OpGroupNonUniformBallotBitExtract:
-		case spv::OpGroupNonUniformBallotBitCount:
-		case spv::OpGroupNonUniformBallotFindLSB:
-		case spv::OpGroupNonUniformBallotFindMSB:
-		case spv::OpGroupNonUniformShuffle:
-		case spv::OpGroupNonUniformShuffleXor:
-		case spv::OpGroupNonUniformShuffleUp:
-		case spv::OpGroupNonUniformShuffleDown:
-		case spv::OpGroupNonUniformIAdd:
-		case spv::OpGroupNonUniformFAdd:
-		case spv::OpGroupNonUniformIMul:
-		case spv::OpGroupNonUniformFMul:
-		case spv::OpGroupNonUniformSMin:
-		case spv::OpGroupNonUniformUMin:
-		case spv::OpGroupNonUniformFMin:
-		case spv::OpGroupNonUniformSMax:
-		case spv::OpGroupNonUniformUMax:
-		case spv::OpGroupNonUniformFMax:
-		case spv::OpGroupNonUniformBitwiseAnd:
-		case spv::OpGroupNonUniformBitwiseOr:
-		case spv::OpGroupNonUniformBitwiseXor:
-		case spv::OpGroupNonUniformLogicalAnd:
-		case spv::OpGroupNonUniformLogicalOr:
-		case spv::OpGroupNonUniformLogicalXor:
-		case spv::OpCopyObject:
-		case spv::OpArrayLength:
-			// Instructions that yield an intermediate value or divergent pointer
-			DefineResult(insn);
-			break;
+			case spv::OpCompositeConstruct:
+			case spv::OpCompositeInsert:
+			case spv::OpCompositeExtract:
+			case spv::OpVectorShuffle:
+			case spv::OpVectorTimesScalar:
+			case spv::OpMatrixTimesScalar:
+			case spv::OpMatrixTimesVector:
+			case spv::OpVectorTimesMatrix:
+			case spv::OpMatrixTimesMatrix:
+			case spv::OpOuterProduct:
+			case spv::OpTranspose:
+			case spv::OpVectorExtractDynamic:
+			case spv::OpVectorInsertDynamic:
+			// Unary ops
+			case spv::OpNot:
+			case spv::OpBitFieldInsert:
+			case spv::OpBitFieldSExtract:
+			case spv::OpBitFieldUExtract:
+			case spv::OpBitReverse:
+			case spv::OpBitCount:
+			case spv::OpSNegate:
+			case spv::OpFNegate:
+			case spv::OpLogicalNot:
+			case spv::OpQuantizeToF16:
+			// Binary ops
+			case spv::OpIAdd:
+			case spv::OpISub:
+			case spv::OpIMul:
+			case spv::OpSDiv:
+			case spv::OpUDiv:
+			case spv::OpFAdd:
+			case spv::OpFSub:
+			case spv::OpFMul:
+			case spv::OpFDiv:
+			case spv::OpFMod:
+			case spv::OpFRem:
+			case spv::OpFOrdEqual:
+			case spv::OpFUnordEqual:
+			case spv::OpFOrdNotEqual:
+			case spv::OpFUnordNotEqual:
+			case spv::OpFOrdLessThan:
+			case spv::OpFUnordLessThan:
+			case spv::OpFOrdGreaterThan:
+			case spv::OpFUnordGreaterThan:
+			case spv::OpFOrdLessThanEqual:
+			case spv::OpFUnordLessThanEqual:
+			case spv::OpFOrdGreaterThanEqual:
+			case spv::OpFUnordGreaterThanEqual:
+			case spv::OpSMod:
+			case spv::OpSRem:
+			case spv::OpUMod:
+			case spv::OpIEqual:
+			case spv::OpINotEqual:
+			case spv::OpUGreaterThan:
+			case spv::OpSGreaterThan:
+			case spv::OpUGreaterThanEqual:
+			case spv::OpSGreaterThanEqual:
+			case spv::OpULessThan:
+			case spv::OpSLessThan:
+			case spv::OpULessThanEqual:
+			case spv::OpSLessThanEqual:
+			case spv::OpShiftRightLogical:
+			case spv::OpShiftRightArithmetic:
+			case spv::OpShiftLeftLogical:
+			case spv::OpBitwiseOr:
+			case spv::OpBitwiseXor:
+			case spv::OpBitwiseAnd:
+			case spv::OpLogicalOr:
+			case spv::OpLogicalAnd:
+			case spv::OpLogicalEqual:
+			case spv::OpLogicalNotEqual:
+			case spv::OpUMulExtended:
+			case spv::OpSMulExtended:
+			case spv::OpIAddCarry:
+			case spv::OpISubBorrow:
+			case spv::OpDot:
+			case spv::OpConvertFToU:
+			case spv::OpConvertFToS:
+			case spv::OpConvertSToF:
+			case spv::OpConvertUToF:
+			case spv::OpBitcast:
+			case spv::OpSelect:
+			case spv::OpExtInst:
+			case spv::OpIsInf:
+			case spv::OpIsNan:
+			case spv::OpAny:
+			case spv::OpAll:
+			case spv::OpDPdx:
+			case spv::OpDPdxCoarse:
+			case spv::OpDPdy:
+			case spv::OpDPdyCoarse:
+			case spv::OpFwidth:
+			case spv::OpFwidthCoarse:
+			case spv::OpDPdxFine:
+			case spv::OpDPdyFine:
+			case spv::OpFwidthFine:
+			case spv::OpAtomicLoad:
+			case spv::OpAtomicIAdd:
+			case spv::OpAtomicISub:
+			case spv::OpAtomicSMin:
+			case spv::OpAtomicSMax:
+			case spv::OpAtomicUMin:
+			case spv::OpAtomicUMax:
+			case spv::OpAtomicAnd:
+			case spv::OpAtomicOr:
+			case spv::OpAtomicXor:
+			case spv::OpAtomicIIncrement:
+			case spv::OpAtomicIDecrement:
+			case spv::OpAtomicExchange:
+			case spv::OpAtomicCompareExchange:
+			case spv::OpPhi:
+			case spv::OpImageSampleImplicitLod:
+			case spv::OpImageSampleExplicitLod:
+			case spv::OpImageSampleDrefImplicitLod:
+			case spv::OpImageSampleDrefExplicitLod:
+			case spv::OpImageSampleProjImplicitLod:
+			case spv::OpImageSampleProjExplicitLod:
+			case spv::OpImageSampleProjDrefImplicitLod:
+			case spv::OpImageSampleProjDrefExplicitLod:
+			case spv::OpImageGather:
+			case spv::OpImageDrefGather:
+			case spv::OpImageFetch:
+			case spv::OpImageQuerySizeLod:
+			case spv::OpImageQuerySize:
+			case spv::OpImageQueryLod:
+			case spv::OpImageQueryLevels:
+			case spv::OpImageQuerySamples:
+			case spv::OpImageRead:
+			case spv::OpImageTexelPointer:
+			case spv::OpGroupNonUniformElect:
+			case spv::OpGroupNonUniformAll:
+			case spv::OpGroupNonUniformAny:
+			case spv::OpGroupNonUniformAllEqual:
+			case spv::OpGroupNonUniformBroadcast:
+			case spv::OpGroupNonUniformBroadcastFirst:
+			case spv::OpGroupNonUniformBallot:
+			case spv::OpGroupNonUniformInverseBallot:
+			case spv::OpGroupNonUniformBallotBitExtract:
+			case spv::OpGroupNonUniformBallotBitCount:
+			case spv::OpGroupNonUniformBallotFindLSB:
+			case spv::OpGroupNonUniformBallotFindMSB:
+			case spv::OpGroupNonUniformShuffle:
+			case spv::OpGroupNonUniformShuffleXor:
+			case spv::OpGroupNonUniformShuffleUp:
+			case spv::OpGroupNonUniformShuffleDown:
+			case spv::OpGroupNonUniformIAdd:
+			case spv::OpGroupNonUniformFAdd:
+			case spv::OpGroupNonUniformIMul:
+			case spv::OpGroupNonUniformFMul:
+			case spv::OpGroupNonUniformSMin:
+			case spv::OpGroupNonUniformUMin:
+			case spv::OpGroupNonUniformFMin:
+			case spv::OpGroupNonUniformSMax:
+			case spv::OpGroupNonUniformUMax:
+			case spv::OpGroupNonUniformFMax:
+			case spv::OpGroupNonUniformBitwiseAnd:
+			case spv::OpGroupNonUniformBitwiseOr:
+			case spv::OpGroupNonUniformBitwiseXor:
+			case spv::OpGroupNonUniformLogicalAnd:
+			case spv::OpGroupNonUniformLogicalOr:
+			case spv::OpGroupNonUniformLogicalXor:
+			case spv::OpCopyObject:
+			case spv::OpArrayLength:
+				// Instructions that yield an intermediate value or divergent pointer
+				DefineResult(insn);
+				break;
 
-		case spv::OpStore:
-		case spv::OpAtomicStore:
-		case spv::OpImageWrite:
-		case spv::OpCopyMemory:
-		case spv::OpMemoryBarrier:
-			// Don't need to do anything during analysis pass
-			break;
+			case spv::OpStore:
+			case spv::OpAtomicStore:
+			case spv::OpImageWrite:
+			case spv::OpCopyMemory:
+			case spv::OpMemoryBarrier:
+				// Don't need to do anything during analysis pass
+				break;
 
-		case spv::OpControlBarrier:
-			modes.ContainsControlBarriers = true;
-			break;
+			case spv::OpControlBarrier:
+				modes.ContainsControlBarriers = true;
+				break;
 
-		case spv::OpExtension:
-		{
-			auto ext = insn.string(1);
-			// Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
-			// extension per Appendix A, `Vulkan Environment for SPIR-V`.
-			if(!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
-			if(!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
-			if(!strcmp(ext, "SPV_KHR_16bit_storage")) break;
-			if(!strcmp(ext, "SPV_KHR_variable_pointers")) break;
-			if(!strcmp(ext, "SPV_KHR_device_group")) break;
-			if(!strcmp(ext, "SPV_KHR_multiview")) break;
-			UNSUPPORTED("SPIR-V Extension: %s", ext);
-			break;
-		}
+			case spv::OpExtension:
+			{
+				auto ext = insn.string(1);
+				// Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
+				// extension per Appendix A, `Vulkan Environment for SPIR-V`.
+				if(!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
+				if(!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
+				if(!strcmp(ext, "SPV_KHR_16bit_storage")) break;
+				if(!strcmp(ext, "SPV_KHR_variable_pointers")) break;
+				if(!strcmp(ext, "SPV_KHR_device_group")) break;
+				if(!strcmp(ext, "SPV_KHR_multiview")) break;
+				UNSUPPORTED("SPIR-V Extension: %s", ext);
+				break;
+			}
 
-		default:
-			UNIMPLEMENTED("%s", OpcodeName(opcode).c_str());
+			default:
+				UNIMPLEMENTED("%s", OpcodeName(opcode).c_str());
 		}
 	}
 
@@ -702,45 +705,45 @@
 	// member. All members of such a structure are builtins.
 	switch(insn.opcode())
 	{
-	case spv::OpTypeStruct:
-	{
-		auto d = memberDecorations.find(resultId);
-		if(d != memberDecorations.end())
+		case spv::OpTypeStruct:
 		{
-			for(auto &m : d->second)
+			auto d = memberDecorations.find(resultId);
+			if(d != memberDecorations.end())
 			{
-				if(m.HasBuiltIn)
+				for(auto &m : d->second)
 				{
-					type.isBuiltInBlock = true;
-					break;
+					if(m.HasBuiltIn)
+					{
+						type.isBuiltInBlock = true;
+						break;
+					}
 				}
 			}
+			break;
 		}
-		break;
-	}
-	case spv::OpTypePointer:
-	{
-		Type::ID elementTypeId = insn.word(3);
-		type.element = elementTypeId;
-		type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
-		type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
-		break;
-	}
-	case spv::OpTypeVector:
-	case spv::OpTypeMatrix:
-	case spv::OpTypeArray:
-	case spv::OpTypeRuntimeArray:
-	{
-		Type::ID elementTypeId = insn.word(2);
-		type.element = elementTypeId;
-		break;
-	}
-	default:
-		break;
+		case spv::OpTypePointer:
+		{
+			Type::ID elementTypeId = insn.word(3);
+			type.element = elementTypeId;
+			type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
+			type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
+			break;
+		}
+		case spv::OpTypeVector:
+		case spv::OpTypeMatrix:
+		case spv::OpTypeArray:
+		case spv::OpTypeRuntimeArray:
+		{
+			Type::ID elementTypeId = insn.word(2);
+			type.element = elementTypeId;
+			break;
+		}
+		default:
+			break;
 	}
 }
 
-SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
+SpirvShader::Object &SpirvShader::CreateConstant(InsnIterator insn)
 {
 	Type::ID typeId = insn.word(1);
 	Object::ID resultId = insn.word(2);
@@ -771,7 +774,7 @@
 	{
 		// walk the builtin block, registering each of its members separately.
 		auto m = memberDecorations.find(objectTy.element);
-		ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
+		ASSERT(m != memberDecorations.end());  // otherwise we wouldn't have marked the type chain
 		auto &structType = pointeeTy.definition;
 		auto offset = 0u;
 		auto word = 2u;
@@ -781,7 +784,7 @@
 
 			if(member.HasBuiltIn)
 			{
-				builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
+				builtinInterface[member.BuiltIn] = { resultId, offset, memberType.sizeInComponents };
 			}
 
 			offset += memberType.sizeInComponents;
@@ -793,24 +796,24 @@
 	auto d = decorations.find(resultId);
 	if(d != decorations.end() && d->second.HasBuiltIn)
 	{
-		builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
+		builtinInterface[d->second.BuiltIn] = { resultId, 0, pointeeTy.sizeInComponents };
 	}
 	else
 	{
 		object.kind = Object::Kind::InterfaceVariable;
 		VisitInterface(resultId,
-					   [&userDefinedInterface](Decorations const &d, AttribType type) {
-						   // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
-						   auto scalarSlot = (d.Location << 2) | d.Component;
-						   ASSERT(scalarSlot >= 0 &&
-								  scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
+		               [&userDefinedInterface](Decorations const &d, AttribType type) {
+			               // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
+			               auto scalarSlot = (d.Location << 2) | d.Component;
+			               ASSERT(scalarSlot >= 0 &&
+			                      scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
 
-						   auto &slot = userDefinedInterface[scalarSlot];
-						   slot.Type = type;
-						   slot.Flat = d.Flat;
-						   slot.NoPerspective = d.NoPerspective;
-						   slot.Centroid = d.Centroid;
-					   });
+			               auto &slot = userDefinedInterface[scalarSlot];
+			               slot.Type = type;
+			               slot.Flat = d.Flat;
+			               slot.NoPerspective = d.NoPerspective;
+			               slot.Centroid = d.Centroid;
+		               });
 	}
 }
 
@@ -819,31 +822,31 @@
 	auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
 	switch(mode)
 	{
-	case spv::ExecutionModeEarlyFragmentTests:
-		modes.EarlyFragmentTests = true;
-		break;
-	case spv::ExecutionModeDepthReplacing:
-		modes.DepthReplacing = true;
-		break;
-	case spv::ExecutionModeDepthGreater:
-		modes.DepthGreater = true;
-		break;
-	case spv::ExecutionModeDepthLess:
-		modes.DepthLess = true;
-		break;
-	case spv::ExecutionModeDepthUnchanged:
-		modes.DepthUnchanged = true;
-		break;
-	case spv::ExecutionModeLocalSize:
-		modes.WorkgroupSizeX = insn.word(3);
-		modes.WorkgroupSizeY = insn.word(4);
-		modes.WorkgroupSizeZ = insn.word(5);
-		break;
-	case spv::ExecutionModeOriginUpperLeft:
-		// This is always the case for a Vulkan shader. Do nothing.
-		break;
-	default:
-		UNREACHABLE("Execution mode: %d", int(mode));
+		case spv::ExecutionModeEarlyFragmentTests:
+			modes.EarlyFragmentTests = true;
+			break;
+		case spv::ExecutionModeDepthReplacing:
+			modes.DepthReplacing = true;
+			break;
+		case spv::ExecutionModeDepthGreater:
+			modes.DepthGreater = true;
+			break;
+		case spv::ExecutionModeDepthLess:
+			modes.DepthLess = true;
+			break;
+		case spv::ExecutionModeDepthUnchanged:
+			modes.DepthUnchanged = true;
+			break;
+		case spv::ExecutionModeLocalSize:
+			modes.WorkgroupSizeX = insn.word(3);
+			modes.WorkgroupSizeY = insn.word(4);
+			modes.WorkgroupSizeZ = insn.word(5);
+			break;
+		case spv::ExecutionModeOriginUpperLeft:
+			// This is always the case for a Vulkan shader. Do nothing.
+			break;
+		default:
+			UNREACHABLE("Execution mode: %d", int(mode));
 	}
 }
 
@@ -854,54 +857,54 @@
 	// already been described (and so their sizes determined)
 	switch(insn.opcode())
 	{
-	case spv::OpTypeVoid:
-	case spv::OpTypeSampler:
-	case spv::OpTypeImage:
-	case spv::OpTypeSampledImage:
-	case spv::OpTypeFunction:
-	case spv::OpTypeRuntimeArray:
-		// Objects that don't consume any space.
-		// Descriptor-backed objects currently only need exist at compile-time.
-		// Runtime arrays don't appear in places where their size would be interesting
-		return 0;
+		case spv::OpTypeVoid:
+		case spv::OpTypeSampler:
+		case spv::OpTypeImage:
+		case spv::OpTypeSampledImage:
+		case spv::OpTypeFunction:
+		case spv::OpTypeRuntimeArray:
+			// Objects that don't consume any space.
+			// Descriptor-backed objects currently only need exist at compile-time.
+			// Runtime arrays don't appear in places where their size would be interesting
+			return 0;
 
-	case spv::OpTypeBool:
-	case spv::OpTypeFloat:
-	case spv::OpTypeInt:
-		// All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
-		// we might need to change this, but only 32 bit components are required for Vulkan 1.1.
-		return 1;
+		case spv::OpTypeBool:
+		case spv::OpTypeFloat:
+		case spv::OpTypeInt:
+			// All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
+			// we might need to change this, but only 32 bit components are required for Vulkan 1.1.
+			return 1;
 
-	case spv::OpTypeVector:
-	case spv::OpTypeMatrix:
-		// Vectors and matrices both consume element count * element size.
-		return getType(insn.word(2)).sizeInComponents * insn.word(3);
+		case spv::OpTypeVector:
+		case spv::OpTypeMatrix:
+			// Vectors and matrices both consume element count * element size.
+			return getType(insn.word(2)).sizeInComponents * insn.word(3);
 
-	case spv::OpTypeArray:
-	{
-		// Element count * element size. Array sizes come from constant ids.
-		auto arraySize = GetConstScalarInt(insn.word(3));
-		return getType(insn.word(2)).sizeInComponents * arraySize;
-	}
-
-	case spv::OpTypeStruct:
-	{
-		uint32_t size = 0;
-		for(uint32_t i = 2u; i < insn.wordCount(); i++)
+		case spv::OpTypeArray:
 		{
-			size += getType(insn.word(i)).sizeInComponents;
+			// Element count * element size. Array sizes come from constant ids.
+			auto arraySize = GetConstScalarInt(insn.word(3));
+			return getType(insn.word(2)).sizeInComponents * arraySize;
 		}
-		return size;
-	}
 
-	case spv::OpTypePointer:
-		// Runtime representation of a pointer is a per-lane index.
-		// Note: clients are expected to look through the pointer if they want the pointee size instead.
-		return 1;
+		case spv::OpTypeStruct:
+		{
+			uint32_t size = 0;
+			for(uint32_t i = 2u; i < insn.wordCount(); i++)
+			{
+				size += getType(insn.word(i)).sizeInComponents;
+			}
+			return size;
+		}
 
-	default:
-		UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
-		return 0;
+		case spv::OpTypePointer:
+			// Runtime representation of a pointer is a per-lane index.
+			// Note: clients are expected to look through the pointer if they want the pointee size instead.
+			return 1;
+
+		default:
+			UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
+			return 0;
 	}
 }
 
@@ -924,54 +927,54 @@
 	auto const &obj = getType(id);
 	switch(obj.opcode())
 	{
-	case spv::OpTypePointer:
-		return VisitInterfaceInner(obj.definition.word(3), d, f);
-	case spv::OpTypeMatrix:
-		for(auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
+		case spv::OpTypePointer:
+			return VisitInterfaceInner(obj.definition.word(3), d, f);
+		case spv::OpTypeMatrix:
+			for(auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
+			{
+				// consumes same components of N consecutive locations
+				VisitInterfaceInner(obj.definition.word(2), d, f);
+			}
+			return d.Location;
+		case spv::OpTypeVector:
+			for(auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
+			{
+				// consumes N consecutive components in the same location
+				VisitInterfaceInner(obj.definition.word(2), d, f);
+			}
+			return d.Location + 1;
+		case spv::OpTypeFloat:
+			f(d, ATTRIBTYPE_FLOAT);
+			return d.Location + 1;
+		case spv::OpTypeInt:
+			f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
+			return d.Location + 1;
+		case spv::OpTypeBool:
+			f(d, ATTRIBTYPE_UINT);
+			return d.Location + 1;
+		case spv::OpTypeStruct:
 		{
-			// consumes same components of N consecutive locations
-			VisitInterfaceInner(obj.definition.word(2), d, f);
+			// iterate over members, which may themselves have Location/Component decorations
+			for(auto i = 0u; i < obj.definition.wordCount() - 2; i++)
+			{
+				ApplyDecorationsForIdMember(&d, id, i);
+				d.Location = VisitInterfaceInner(obj.definition.word(i + 2), d, f);
+				d.Component = 0;  // Implicit locations always have component=0
+			}
+			return d.Location;
 		}
-		return d.Location;
-	case spv::OpTypeVector:
-		for(auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
+		case spv::OpTypeArray:
 		{
-			// consumes N consecutive components in the same location
-			VisitInterfaceInner(obj.definition.word(2), d, f);
+			auto arraySize = GetConstScalarInt(obj.definition.word(3));
+			for(auto i = 0u; i < arraySize; i++)
+			{
+				d.Location = VisitInterfaceInner(obj.definition.word(2), d, f);
+			}
+			return d.Location;
 		}
-		return d.Location + 1;
-	case spv::OpTypeFloat:
-		f(d, ATTRIBTYPE_FLOAT);
-		return d.Location + 1;
-	case spv::OpTypeInt:
-		f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
-		return d.Location + 1;
-	case spv::OpTypeBool:
-		f(d, ATTRIBTYPE_UINT);
-		return d.Location + 1;
-	case spv::OpTypeStruct:
-	{
-		// iterate over members, which may themselves have Location/Component decorations
-		for(auto i = 0u; i < obj.definition.wordCount() - 2; i++)
-		{
-			ApplyDecorationsForIdMember(&d, id, i);
-			d.Location = VisitInterfaceInner(obj.definition.word(i + 2), d, f);
-			d.Component = 0;    // Implicit locations always have component=0
-		}
-		return d.Location;
-	}
-	case spv::OpTypeArray:
-	{
-		auto arraySize = GetConstScalarInt(obj.definition.word(3));
-		for(auto i = 0u; i < arraySize; i++)
-		{
-			d.Location = VisitInterfaceInner(obj.definition.word(2), d, f);
-		}
-		return d.Location;
-	}
-	default:
-		// Intentionally partial; most opcodes do not participate in type hierarchies
-		return 0;
+		default:
+			// Intentionally partial; most opcodes do not participate in type hierarchies
+			return 0;
 	}
 }
 
@@ -996,33 +999,33 @@
 	for(auto i = 0u; i < numIndexes; i++)
 	{
 		ApplyDecorationsForId(d, typeId);
-		auto & type = getType(typeId);
+		auto &type = getType(typeId);
 		switch(type.opcode())
 		{
-		case spv::OpTypeStruct:
-		{
-			int memberIndex = GetConstScalarInt(indexIds[i]);
-			ApplyDecorationsForIdMember(d, typeId, memberIndex);
-			typeId = type.definition.word(2u + memberIndex);
-			break;
-		}
-		case spv::OpTypeArray:
-		case spv::OpTypeRuntimeArray:
-			if(dd->InputAttachmentIndex >= 0)
+			case spv::OpTypeStruct:
 			{
-				dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]);
+				int memberIndex = GetConstScalarInt(indexIds[i]);
+				ApplyDecorationsForIdMember(d, typeId, memberIndex);
+				typeId = type.definition.word(2u + memberIndex);
+				break;
 			}
-			typeId = type.element;
-			break;
-		case spv::OpTypeVector:
-			typeId = type.element;
-			break;
-		case spv::OpTypeMatrix:
-			typeId = type.element;
-			d->InsideMatrix = true;
-			break;
-		default:
-			UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
+			case spv::OpTypeArray:
+			case spv::OpTypeRuntimeArray:
+				if(dd->InputAttachmentIndex >= 0)
+				{
+					dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]);
+				}
+				typeId = type.element;
+				break;
+			case spv::OpTypeVector:
+				typeId = type.element;
+				break;
+			case spv::OpTypeMatrix:
+				typeId = type.element;
+				d->InsideMatrix = true;
+				break;
+			default:
+				UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
 		}
 	}
 }
@@ -1057,72 +1060,72 @@
 
 	for(auto i = 0u; i < numIndexes; i++)
 	{
-		auto & type = getType(typeId);
+		auto &type = getType(typeId);
 		ApplyDecorationsForId(&d, typeId);
 
 		switch(type.definition.opcode())
 		{
-		case spv::OpTypeStruct:
-		{
-			int memberIndex = GetConstScalarInt(indexIds[i]);
-			ApplyDecorationsForIdMember(&d, typeId, memberIndex);
-			ASSERT(d.HasOffset);
-			constantOffset += d.Offset;
-			typeId = type.definition.word(2u + memberIndex);
-			break;
-		}
-		case spv::OpTypeArray:
-		case spv::OpTypeRuntimeArray:
-		{
-			// TODO: b/127950082: Check bounds.
-			ASSERT(d.HasArrayStride);
-			auto & obj = getObject(indexIds[i]);
-			if(obj.kind == Object::Kind::Constant)
+			case spv::OpTypeStruct:
 			{
-				constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]);
+				int memberIndex = GetConstScalarInt(indexIds[i]);
+				ApplyDecorationsForIdMember(&d, typeId, memberIndex);
+				ASSERT(d.HasOffset);
+				constantOffset += d.Offset;
+				typeId = type.definition.word(2u + memberIndex);
+				break;
 			}
-			else
+			case spv::OpTypeArray:
+			case spv::OpTypeRuntimeArray:
 			{
-				ptr += SIMD::Int(d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(0);
+				// TODO: b/127950082: Check bounds.
+				ASSERT(d.HasArrayStride);
+				auto &obj = getObject(indexIds[i]);
+				if(obj.kind == Object::Kind::Constant)
+				{
+					constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]);
+				}
+				else
+				{
+					ptr += SIMD::Int(d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(0);
+				}
+				typeId = type.element;
+				break;
 			}
-			typeId = type.element;
-			break;
-		}
-		case spv::OpTypeMatrix:
-		{
-			// TODO: b/127950082: Check bounds.
-			ASSERT(d.HasMatrixStride);
-			d.InsideMatrix = true;
-			auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
-			auto & obj = getObject(indexIds[i]);
-			if(obj.kind == Object::Kind::Constant)
+			case spv::OpTypeMatrix:
 			{
-				constantOffset += columnStride * GetConstScalarInt(indexIds[i]);
+				// TODO: b/127950082: Check bounds.
+				ASSERT(d.HasMatrixStride);
+				d.InsideMatrix = true;
+				auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
+				auto &obj = getObject(indexIds[i]);
+				if(obj.kind == Object::Kind::Constant)
+				{
+					constantOffset += columnStride * GetConstScalarInt(indexIds[i]);
+				}
+				else
+				{
+					ptr += SIMD::Int(columnStride) * state->getIntermediate(indexIds[i]).Int(0);
+				}
+				typeId = type.element;
+				break;
 			}
-			else
+			case spv::OpTypeVector:
 			{
-				ptr += SIMD::Int(columnStride) * state->getIntermediate(indexIds[i]).Int(0);
+				auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
+				auto &obj = getObject(indexIds[i]);
+				if(obj.kind == Object::Kind::Constant)
+				{
+					constantOffset += elemStride * GetConstScalarInt(indexIds[i]);
+				}
+				else
+				{
+					ptr += SIMD::Int(elemStride) * state->getIntermediate(indexIds[i]).Int(0);
+				}
+				typeId = type.element;
+				break;
 			}
-			typeId = type.element;
-			break;
-		}
-		case spv::OpTypeVector:
-		{
-			auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
-			auto & obj = getObject(indexIds[i]);
-			if(obj.kind == Object::Kind::Constant)
-			{
-				constantOffset += elemStride * GetConstScalarInt(indexIds[i]);
-			}
-			else
-			{
-				ptr += SIMD::Int(elemStride) * state->getIntermediate(indexIds[i]).Int(0);
-			}
-			typeId = type.element;
-			break;
-		}
-		default:
-			UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
+			default:
+				UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
 		}
 	}
 
@@ -1143,63 +1146,64 @@
 
 	for(auto i = 0u; i < numIndexes; i++)
 	{
-		auto & type = getType(typeId);
+		auto &type = getType(typeId);
 		switch(type.opcode())
 		{
-		case spv::OpTypeStruct:
-		{
-			int memberIndex = GetConstScalarInt(indexIds[i]);
-			int offsetIntoStruct = 0;
-			for(auto j = 0; j < memberIndex; j++) {
-				auto memberType = type.definition.word(2u + j);
-				offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
-			}
-			constantOffset += offsetIntoStruct;
-			typeId = type.definition.word(2u + memberIndex);
-			break;
-		}
-
-		case spv::OpTypeVector:
-		case spv::OpTypeMatrix:
-		case spv::OpTypeArray:
-		case spv::OpTypeRuntimeArray:
-		{
-			// TODO: b/127950082: Check bounds.
-			if(getType(baseObject.type).storageClass == spv::StorageClassUniformConstant)
+			case spv::OpTypeStruct:
 			{
-				// indexing into an array of descriptors.
-				auto &obj = getObject(indexIds[i]);
-				if(obj.kind != Object::Kind::Constant)
+				int memberIndex = GetConstScalarInt(indexIds[i]);
+				int offsetIntoStruct = 0;
+				for(auto j = 0; j < memberIndex; j++)
 				{
-					UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability");
+					auto memberType = type.definition.word(2u + j);
+					offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
 				}
-
-				auto d = descriptorDecorations.at(baseId);
-				ASSERT(d.DescriptorSet >= 0);
-				ASSERT(d.Binding >= 0);
-				auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
-				auto stride = static_cast<uint32_t>(setLayout->getBindingStride(d.Binding));
-				ptr.base += stride * GetConstScalarInt(indexIds[i]);
+				constantOffset += offsetIntoStruct;
+				typeId = type.definition.word(2u + memberIndex);
+				break;
 			}
-			else
+
+			case spv::OpTypeVector:
+			case spv::OpTypeMatrix:
+			case spv::OpTypeArray:
+			case spv::OpTypeRuntimeArray:
 			{
-				auto stride = getType(type.element).sizeInComponents * static_cast<uint32_t>(sizeof(float));
-				auto & obj = getObject(indexIds[i]);
-				if(obj.kind == Object::Kind::Constant)
+				// TODO: b/127950082: Check bounds.
+				if(getType(baseObject.type).storageClass == spv::StorageClassUniformConstant)
 				{
-					ptr += stride * GetConstScalarInt(indexIds[i]);
+					// indexing into an array of descriptors.
+					auto &obj = getObject(indexIds[i]);
+					if(obj.kind != Object::Kind::Constant)
+					{
+						UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability");
+					}
+
+					auto d = descriptorDecorations.at(baseId);
+					ASSERT(d.DescriptorSet >= 0);
+					ASSERT(d.Binding >= 0);
+					auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
+					auto stride = static_cast<uint32_t>(setLayout->getBindingStride(d.Binding));
+					ptr.base += stride * GetConstScalarInt(indexIds[i]);
 				}
 				else
 				{
-					ptr += SIMD::Int(stride) * state->getIntermediate(indexIds[i]).Int(0);
+					auto stride = getType(type.element).sizeInComponents * static_cast<uint32_t>(sizeof(float));
+					auto &obj = getObject(indexIds[i]);
+					if(obj.kind == Object::Kind::Constant)
+					{
+						ptr += stride * GetConstScalarInt(indexIds[i]);
+					}
+					else
+					{
+						ptr += SIMD::Int(stride) * state->getIntermediate(indexIds[i]).Int(0);
+					}
 				}
+				typeId = type.element;
+				break;
 			}
-			typeId = type.element;
-			break;
-		}
 
-		default:
-			UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
+			default:
+				UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
 		}
 	}
 
@@ -1216,35 +1220,36 @@
 
 	for(auto i = 0u; i < numIndexes; i++)
 	{
-		auto & type = getType(typeId);
+		auto &type = getType(typeId);
 		switch(type.opcode())
 		{
-		case spv::OpTypeStruct:
-		{
-			int memberIndex = indexes[i];
-			int offsetIntoStruct = 0;
-			for(auto j = 0; j < memberIndex; j++) {
-				auto memberType = type.definition.word(2u + j);
-				offsetIntoStruct += getType(memberType).sizeInComponents;
+			case spv::OpTypeStruct:
+			{
+				int memberIndex = indexes[i];
+				int offsetIntoStruct = 0;
+				for(auto j = 0; j < memberIndex; j++)
+				{
+					auto memberType = type.definition.word(2u + j);
+					offsetIntoStruct += getType(memberType).sizeInComponents;
+				}
+				componentOffset += offsetIntoStruct;
+				typeId = type.definition.word(2u + memberIndex);
+				break;
 			}
-			componentOffset += offsetIntoStruct;
-			typeId = type.definition.word(2u + memberIndex);
-			break;
-		}
 
-		case spv::OpTypeVector:
-		case spv::OpTypeMatrix:
-		case spv::OpTypeArray:
-		{
-			auto elementType = type.definition.word(2);
-			auto stride = getType(elementType).sizeInComponents;
-			componentOffset += stride * indexes[i];
-			typeId = elementType;
-			break;
-		}
+			case spv::OpTypeVector:
+			case spv::OpTypeMatrix:
+			case spv::OpTypeArray:
+			{
+				auto elementType = type.definition.word(2);
+				auto stride = getType(elementType).sizeInComponents;
+				componentOffset += stride * indexes[i];
+				typeId = elementType;
+				break;
+			}
 
-		default:
-			UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
+			default:
+				UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
 		}
 	}
 
@@ -1255,58 +1260,58 @@
 {
 	switch(decoration)
 	{
-	case spv::DecorationLocation:
-		HasLocation = true;
-		Location = static_cast<int32_t>(arg);
-		break;
-	case spv::DecorationComponent:
-		HasComponent = true;
-		Component = arg;
-		break;
-	case spv::DecorationBuiltIn:
-		HasBuiltIn = true;
-		BuiltIn = static_cast<spv::BuiltIn>(arg);
-		break;
-	case spv::DecorationFlat:
-		Flat = true;
-		break;
-	case spv::DecorationNoPerspective:
-		NoPerspective = true;
-		break;
-	case spv::DecorationCentroid:
-		Centroid = true;
-		break;
-	case spv::DecorationBlock:
-		Block = true;
-		break;
-	case spv::DecorationBufferBlock:
-		BufferBlock = true;
-		break;
-	case spv::DecorationOffset:
-		HasOffset = true;
-		Offset = static_cast<int32_t>(arg);
-		break;
-	case spv::DecorationArrayStride:
-		HasArrayStride = true;
-		ArrayStride = static_cast<int32_t>(arg);
-		break;
-	case spv::DecorationMatrixStride:
-		HasMatrixStride = true;
-		MatrixStride = static_cast<int32_t>(arg);
-		break;
-	case spv::DecorationRelaxedPrecision:
-		RelaxedPrecision = true;
-		break;
-	case spv::DecorationRowMajor:
-		HasRowMajor = true;
-		RowMajor = true;
-		break;
-	case spv::DecorationColMajor:
-		HasRowMajor = true;
-		RowMajor = false;
-	default:
-		// Intentionally partial, there are many decorations we just don't care about.
-		break;
+		case spv::DecorationLocation:
+			HasLocation = true;
+			Location = static_cast<int32_t>(arg);
+			break;
+		case spv::DecorationComponent:
+			HasComponent = true;
+			Component = arg;
+			break;
+		case spv::DecorationBuiltIn:
+			HasBuiltIn = true;
+			BuiltIn = static_cast<spv::BuiltIn>(arg);
+			break;
+		case spv::DecorationFlat:
+			Flat = true;
+			break;
+		case spv::DecorationNoPerspective:
+			NoPerspective = true;
+			break;
+		case spv::DecorationCentroid:
+			Centroid = true;
+			break;
+		case spv::DecorationBlock:
+			Block = true;
+			break;
+		case spv::DecorationBufferBlock:
+			BufferBlock = true;
+			break;
+		case spv::DecorationOffset:
+			HasOffset = true;
+			Offset = static_cast<int32_t>(arg);
+			break;
+		case spv::DecorationArrayStride:
+			HasArrayStride = true;
+			ArrayStride = static_cast<int32_t>(arg);
+			break;
+		case spv::DecorationMatrixStride:
+			HasMatrixStride = true;
+			MatrixStride = static_cast<int32_t>(arg);
+			break;
+		case spv::DecorationRelaxedPrecision:
+			RelaxedPrecision = true;
+			break;
+		case spv::DecorationRowMajor:
+			HasRowMajor = true;
+			RowMajor = true;
+			break;
+		case spv::DecorationColMajor:
+			HasRowMajor = true;
+			RowMajor = false;
+		default:
+			// Intentionally partial, there are many decorations we just don't care about.
+			break;
 	}
 }
 
@@ -1407,15 +1412,15 @@
 
 	switch(getType(typeId).opcode())
 	{
-	case spv::OpTypePointer:
-	case spv::OpTypeImage:
-	case spv::OpTypeSampledImage:
-	case spv::OpTypeSampler:
-		object.kind = Object::Kind::Pointer;
-		break;
+		case spv::OpTypePointer:
+		case spv::OpTypeImage:
+		case spv::OpTypeSampledImage:
+		case spv::OpTypeSampler:
+			object.kind = Object::Kind::Pointer;
+			break;
 
-	default:
-		object.kind = Object::Kind::Intermediate;
+		default:
+			object.kind = Object::Kind::Intermediate;
 	}
 
 	object.definition = insn;
@@ -1425,27 +1430,27 @@
 {
 	switch(storageClass)
 	{
-	case spv::StorageClassUniform:
-	case spv::StorageClassStorageBuffer:
-		// Buffer resource access. robustBufferAccess feature applies.
-		return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
-		                          : OutOfBoundsBehavior::UndefinedBehavior;
-
-	case spv::StorageClassImage:
-		return OutOfBoundsBehavior::UndefinedValue;  // "The value returned by a read of an invalid texel is undefined"
-
-	case spv::StorageClassInput:
-		if(executionModel == spv::ExecutionModelVertex)
-		{
-			// Vertex attributes follow robustBufferAccess rules.
+		case spv::StorageClassUniform:
+		case spv::StorageClassStorageBuffer:
+			// Buffer resource access. robustBufferAccess feature applies.
 			return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
 			                          : OutOfBoundsBehavior::UndefinedBehavior;
-		}
-		// Fall through to default case.
-	default:
-		// TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
-		// TODO(b/131224163): Optimize cases statically known to be within bounds.
-		return OutOfBoundsBehavior::UndefinedValue;
+
+		case spv::StorageClassImage:
+			return OutOfBoundsBehavior::UndefinedValue;  // "The value returned by a read of an invalid texel is undefined"
+
+		case spv::StorageClassInput:
+			if(executionModel == spv::ExecutionModelVertex)
+			{
+				// Vertex attributes follow robustBufferAccess rules.
+				return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
+				                          : OutOfBoundsBehavior::UndefinedBehavior;
+			}
+			// Fall through to default case.
+		default:
+			// TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
+			// TODO(b/131224163): Optimize cases statically known to be within bounds.
+			return OutOfBoundsBehavior::UndefinedValue;
 	}
 
 	return OutOfBoundsBehavior::Nullify;
@@ -1459,48 +1464,48 @@
 	{
 		switch(insn.opcode())
 		{
-		case spv::OpVariable:
-		{
-			Type::ID resultPointerTypeId = insn.word(1);
-			auto resultPointerType = getType(resultPointerTypeId);
-			auto pointeeType = getType(resultPointerType.element);
+			case spv::OpVariable:
+			{
+				Type::ID resultPointerTypeId = insn.word(1);
+				auto resultPointerType = getType(resultPointerTypeId);
+				auto pointeeType = getType(resultPointerType.element);
 
-			if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
+				if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
+				{
+					Object::ID resultId = insn.word(2);
+					routine->createVariable(resultId, pointeeType.sizeInComponents);
+				}
+				break;
+			}
+			case spv::OpPhi:
+			{
+				auto type = getType(insn.word(1));
+				Object::ID resultId = insn.word(2);
+				routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
+				break;
+			}
+
+			case spv::OpImageDrefGather:
+			case spv::OpImageFetch:
+			case spv::OpImageGather:
+			case spv::OpImageQueryLod:
+			case spv::OpImageSampleDrefExplicitLod:
+			case spv::OpImageSampleDrefImplicitLod:
+			case spv::OpImageSampleExplicitLod:
+			case spv::OpImageSampleImplicitLod:
+			case spv::OpImageSampleProjDrefExplicitLod:
+			case spv::OpImageSampleProjDrefImplicitLod:
+			case spv::OpImageSampleProjExplicitLod:
+			case spv::OpImageSampleProjImplicitLod:
 			{
 				Object::ID resultId = insn.word(2);
-				routine->createVariable(resultId, pointeeType.sizeInComponents);
+				routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
+				break;
 			}
-			break;
-		}
-		case spv::OpPhi:
-		{
-			auto type = getType(insn.word(1));
-			Object::ID resultId = insn.word(2);
-			routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
-			break;
-		}
 
-		case spv::OpImageDrefGather:
-		case spv::OpImageFetch:
-		case spv::OpImageGather:
-		case spv::OpImageQueryLod:
-		case spv::OpImageSampleDrefExplicitLod:
-		case spv::OpImageSampleDrefImplicitLod:
-		case spv::OpImageSampleExplicitLod:
-		case spv::OpImageSampleImplicitLod:
-		case spv::OpImageSampleProjDrefExplicitLod:
-		case spv::OpImageSampleProjDrefImplicitLod:
-		case spv::OpImageSampleProjExplicitLod:
-		case spv::OpImageSampleProjImplicitLod:
-		{
-			Object::ID resultId = insn.word(2);
-			routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
-			break;
-		}
-
-		default:
-			// Nothing else produces interface variables, so can all be safely ignored.
-			break;
+			default:
+				// Nothing else produces interface variables, so can all be safely ignored.
+				break;
 		}
 	}
 }
@@ -1531,13 +1536,13 @@
 		auto res = EmitInstruction(insn, state);
 		switch(res)
 		{
-		case EmitResult::Continue:
-			continue;
-		case EmitResult::Terminator:
-			break;
-		default:
-			UNREACHABLE("Unexpected EmitResult %d", int(res));
-			break;
+			case EmitResult::Continue:
+				continue;
+			case EmitResult::Terminator:
+				break;
+			default:
+				UNREACHABLE("Unexpected EmitResult %d", int(res));
+				break;
 		}
 	}
 }
@@ -1548,365 +1553,365 @@
 
 	switch(opcode)
 	{
-	case spv::OpTypeVoid:
-	case spv::OpTypeInt:
-	case spv::OpTypeFloat:
-	case spv::OpTypeBool:
-	case spv::OpTypeVector:
-	case spv::OpTypeArray:
-	case spv::OpTypeRuntimeArray:
-	case spv::OpTypeMatrix:
-	case spv::OpTypeStruct:
-	case spv::OpTypePointer:
-	case spv::OpTypeFunction:
-	case spv::OpTypeImage:
-	case spv::OpTypeSampledImage:
-	case spv::OpTypeSampler:
-	case spv::OpExecutionMode:
-	case spv::OpMemoryModel:
-	case spv::OpFunction:
-	case spv::OpFunctionEnd:
-	case spv::OpConstant:
-	case spv::OpConstantNull:
-	case spv::OpConstantTrue:
-	case spv::OpConstantFalse:
-	case spv::OpConstantComposite:
-	case spv::OpSpecConstant:
-	case spv::OpSpecConstantTrue:
-	case spv::OpSpecConstantFalse:
-	case spv::OpSpecConstantComposite:
-	case spv::OpSpecConstantOp:
-	case spv::OpUndef:
-	case spv::OpExtension:
-	case spv::OpCapability:
-	case spv::OpEntryPoint:
-	case spv::OpExtInstImport:
-	case spv::OpDecorate:
-	case spv::OpMemberDecorate:
-	case spv::OpGroupDecorate:
-	case spv::OpGroupMemberDecorate:
-	case spv::OpDecorationGroup:
-	case spv::OpName:
-	case spv::OpMemberName:
-	case spv::OpSource:
-	case spv::OpSourceContinued:
-	case spv::OpSourceExtension:
-	case spv::OpLine:
-	case spv::OpNoLine:
-	case spv::OpModuleProcessed:
-	case spv::OpString:
-		// Nothing to do at emit time. These are either fully handled at analysis time,
-		// or don't require any work at all.
-		return EmitResult::Continue;
+		case spv::OpTypeVoid:
+		case spv::OpTypeInt:
+		case spv::OpTypeFloat:
+		case spv::OpTypeBool:
+		case spv::OpTypeVector:
+		case spv::OpTypeArray:
+		case spv::OpTypeRuntimeArray:
+		case spv::OpTypeMatrix:
+		case spv::OpTypeStruct:
+		case spv::OpTypePointer:
+		case spv::OpTypeFunction:
+		case spv::OpTypeImage:
+		case spv::OpTypeSampledImage:
+		case spv::OpTypeSampler:
+		case spv::OpExecutionMode:
+		case spv::OpMemoryModel:
+		case spv::OpFunction:
+		case spv::OpFunctionEnd:
+		case spv::OpConstant:
+		case spv::OpConstantNull:
+		case spv::OpConstantTrue:
+		case spv::OpConstantFalse:
+		case spv::OpConstantComposite:
+		case spv::OpSpecConstant:
+		case spv::OpSpecConstantTrue:
+		case spv::OpSpecConstantFalse:
+		case spv::OpSpecConstantComposite:
+		case spv::OpSpecConstantOp:
+		case spv::OpUndef:
+		case spv::OpExtension:
+		case spv::OpCapability:
+		case spv::OpEntryPoint:
+		case spv::OpExtInstImport:
+		case spv::OpDecorate:
+		case spv::OpMemberDecorate:
+		case spv::OpGroupDecorate:
+		case spv::OpGroupMemberDecorate:
+		case spv::OpDecorationGroup:
+		case spv::OpName:
+		case spv::OpMemberName:
+		case spv::OpSource:
+		case spv::OpSourceContinued:
+		case spv::OpSourceExtension:
+		case spv::OpLine:
+		case spv::OpNoLine:
+		case spv::OpModuleProcessed:
+		case spv::OpString:
+			// Nothing to do at emit time. These are either fully handled at analysis time,
+			// or don't require any work at all.
+			return EmitResult::Continue;
 
-	case spv::OpLabel:
-		return EmitResult::Continue;
+		case spv::OpLabel:
+			return EmitResult::Continue;
 
-	case spv::OpVariable:
-		return EmitVariable(insn, state);
+		case spv::OpVariable:
+			return EmitVariable(insn, state);
 
-	case spv::OpLoad:
-	case spv::OpAtomicLoad:
-		return EmitLoad(insn, state);
+		case spv::OpLoad:
+		case spv::OpAtomicLoad:
+			return EmitLoad(insn, state);
 
-	case spv::OpStore:
-	case spv::OpAtomicStore:
-		return EmitStore(insn, state);
+		case spv::OpStore:
+		case spv::OpAtomicStore:
+			return EmitStore(insn, state);
 
-	case spv::OpAtomicIAdd:
-	case spv::OpAtomicISub:
-	case spv::OpAtomicSMin:
-	case spv::OpAtomicSMax:
-	case spv::OpAtomicUMin:
-	case spv::OpAtomicUMax:
-	case spv::OpAtomicAnd:
-	case spv::OpAtomicOr:
-	case spv::OpAtomicXor:
-	case spv::OpAtomicIIncrement:
-	case spv::OpAtomicIDecrement:
-	case spv::OpAtomicExchange:
-		return EmitAtomicOp(insn, state);
+		case spv::OpAtomicIAdd:
+		case spv::OpAtomicISub:
+		case spv::OpAtomicSMin:
+		case spv::OpAtomicSMax:
+		case spv::OpAtomicUMin:
+		case spv::OpAtomicUMax:
+		case spv::OpAtomicAnd:
+		case spv::OpAtomicOr:
+		case spv::OpAtomicXor:
+		case spv::OpAtomicIIncrement:
+		case spv::OpAtomicIDecrement:
+		case spv::OpAtomicExchange:
+			return EmitAtomicOp(insn, state);
 
-	case spv::OpAtomicCompareExchange:
-		return EmitAtomicCompareExchange(insn, state);
+		case spv::OpAtomicCompareExchange:
+			return EmitAtomicCompareExchange(insn, state);
 
-	case spv::OpAccessChain:
-	case spv::OpInBoundsAccessChain:
-		return EmitAccessChain(insn, state);
+		case spv::OpAccessChain:
+		case spv::OpInBoundsAccessChain:
+			return EmitAccessChain(insn, state);
 
-	case spv::OpCompositeConstruct:
-		return EmitCompositeConstruct(insn, state);
+		case spv::OpCompositeConstruct:
+			return EmitCompositeConstruct(insn, state);
 
-	case spv::OpCompositeInsert:
-		return EmitCompositeInsert(insn, state);
+		case spv::OpCompositeInsert:
+			return EmitCompositeInsert(insn, state);
 
-	case spv::OpCompositeExtract:
-		return EmitCompositeExtract(insn, state);
+		case spv::OpCompositeExtract:
+			return EmitCompositeExtract(insn, state);
 
-	case spv::OpVectorShuffle:
-		return EmitVectorShuffle(insn, state);
+		case spv::OpVectorShuffle:
+			return EmitVectorShuffle(insn, state);
 
-	case spv::OpVectorExtractDynamic:
-		return EmitVectorExtractDynamic(insn, state);
+		case spv::OpVectorExtractDynamic:
+			return EmitVectorExtractDynamic(insn, state);
 
-	case spv::OpVectorInsertDynamic:
-		return EmitVectorInsertDynamic(insn, state);
+		case spv::OpVectorInsertDynamic:
+			return EmitVectorInsertDynamic(insn, state);
 
-	case spv::OpVectorTimesScalar:
-	case spv::OpMatrixTimesScalar:
-		return EmitVectorTimesScalar(insn, state);
+		case spv::OpVectorTimesScalar:
+		case spv::OpMatrixTimesScalar:
+			return EmitVectorTimesScalar(insn, state);
 
-	case spv::OpMatrixTimesVector:
-		return EmitMatrixTimesVector(insn, state);
+		case spv::OpMatrixTimesVector:
+			return EmitMatrixTimesVector(insn, state);
 
-	case spv::OpVectorTimesMatrix:
-		return EmitVectorTimesMatrix(insn, state);
+		case spv::OpVectorTimesMatrix:
+			return EmitVectorTimesMatrix(insn, state);
 
-	case spv::OpMatrixTimesMatrix:
-		return EmitMatrixTimesMatrix(insn, state);
+		case spv::OpMatrixTimesMatrix:
+			return EmitMatrixTimesMatrix(insn, state);
 
-	case spv::OpOuterProduct:
-		return EmitOuterProduct(insn, state);
+		case spv::OpOuterProduct:
+			return EmitOuterProduct(insn, state);
 
-	case spv::OpTranspose:
-		return EmitTranspose(insn, state);
+		case spv::OpTranspose:
+			return EmitTranspose(insn, state);
 
-	case spv::OpNot:
-	case spv::OpBitFieldInsert:
-	case spv::OpBitFieldSExtract:
-	case spv::OpBitFieldUExtract:
-	case spv::OpBitReverse:
-	case spv::OpBitCount:
-	case spv::OpSNegate:
-	case spv::OpFNegate:
-	case spv::OpLogicalNot:
-	case spv::OpConvertFToU:
-	case spv::OpConvertFToS:
-	case spv::OpConvertSToF:
-	case spv::OpConvertUToF:
-	case spv::OpBitcast:
-	case spv::OpIsInf:
-	case spv::OpIsNan:
-	case spv::OpDPdx:
-	case spv::OpDPdxCoarse:
-	case spv::OpDPdy:
-	case spv::OpDPdyCoarse:
-	case spv::OpFwidth:
-	case spv::OpFwidthCoarse:
-	case spv::OpDPdxFine:
-	case spv::OpDPdyFine:
-	case spv::OpFwidthFine:
-	case spv::OpQuantizeToF16:
-		return EmitUnaryOp(insn, state);
+		case spv::OpNot:
+		case spv::OpBitFieldInsert:
+		case spv::OpBitFieldSExtract:
+		case spv::OpBitFieldUExtract:
+		case spv::OpBitReverse:
+		case spv::OpBitCount:
+		case spv::OpSNegate:
+		case spv::OpFNegate:
+		case spv::OpLogicalNot:
+		case spv::OpConvertFToU:
+		case spv::OpConvertFToS:
+		case spv::OpConvertSToF:
+		case spv::OpConvertUToF:
+		case spv::OpBitcast:
+		case spv::OpIsInf:
+		case spv::OpIsNan:
+		case spv::OpDPdx:
+		case spv::OpDPdxCoarse:
+		case spv::OpDPdy:
+		case spv::OpDPdyCoarse:
+		case spv::OpFwidth:
+		case spv::OpFwidthCoarse:
+		case spv::OpDPdxFine:
+		case spv::OpDPdyFine:
+		case spv::OpFwidthFine:
+		case spv::OpQuantizeToF16:
+			return EmitUnaryOp(insn, state);
 
-	case spv::OpIAdd:
-	case spv::OpISub:
-	case spv::OpIMul:
-	case spv::OpSDiv:
-	case spv::OpUDiv:
-	case spv::OpFAdd:
-	case spv::OpFSub:
-	case spv::OpFMul:
-	case spv::OpFDiv:
-	case spv::OpFMod:
-	case spv::OpFRem:
-	case spv::OpFOrdEqual:
-	case spv::OpFUnordEqual:
-	case spv::OpFOrdNotEqual:
-	case spv::OpFUnordNotEqual:
-	case spv::OpFOrdLessThan:
-	case spv::OpFUnordLessThan:
-	case spv::OpFOrdGreaterThan:
-	case spv::OpFUnordGreaterThan:
-	case spv::OpFOrdLessThanEqual:
-	case spv::OpFUnordLessThanEqual:
-	case spv::OpFOrdGreaterThanEqual:
-	case spv::OpFUnordGreaterThanEqual:
-	case spv::OpSMod:
-	case spv::OpSRem:
-	case spv::OpUMod:
-	case spv::OpIEqual:
-	case spv::OpINotEqual:
-	case spv::OpUGreaterThan:
-	case spv::OpSGreaterThan:
-	case spv::OpUGreaterThanEqual:
-	case spv::OpSGreaterThanEqual:
-	case spv::OpULessThan:
-	case spv::OpSLessThan:
-	case spv::OpULessThanEqual:
-	case spv::OpSLessThanEqual:
-	case spv::OpShiftRightLogical:
-	case spv::OpShiftRightArithmetic:
-	case spv::OpShiftLeftLogical:
-	case spv::OpBitwiseOr:
-	case spv::OpBitwiseXor:
-	case spv::OpBitwiseAnd:
-	case spv::OpLogicalOr:
-	case spv::OpLogicalAnd:
-	case spv::OpLogicalEqual:
-	case spv::OpLogicalNotEqual:
-	case spv::OpUMulExtended:
-	case spv::OpSMulExtended:
-	case spv::OpIAddCarry:
-	case spv::OpISubBorrow:
-		return EmitBinaryOp(insn, state);
+		case spv::OpIAdd:
+		case spv::OpISub:
+		case spv::OpIMul:
+		case spv::OpSDiv:
+		case spv::OpUDiv:
+		case spv::OpFAdd:
+		case spv::OpFSub:
+		case spv::OpFMul:
+		case spv::OpFDiv:
+		case spv::OpFMod:
+		case spv::OpFRem:
+		case spv::OpFOrdEqual:
+		case spv::OpFUnordEqual:
+		case spv::OpFOrdNotEqual:
+		case spv::OpFUnordNotEqual:
+		case spv::OpFOrdLessThan:
+		case spv::OpFUnordLessThan:
+		case spv::OpFOrdGreaterThan:
+		case spv::OpFUnordGreaterThan:
+		case spv::OpFOrdLessThanEqual:
+		case spv::OpFUnordLessThanEqual:
+		case spv::OpFOrdGreaterThanEqual:
+		case spv::OpFUnordGreaterThanEqual:
+		case spv::OpSMod:
+		case spv::OpSRem:
+		case spv::OpUMod:
+		case spv::OpIEqual:
+		case spv::OpINotEqual:
+		case spv::OpUGreaterThan:
+		case spv::OpSGreaterThan:
+		case spv::OpUGreaterThanEqual:
+		case spv::OpSGreaterThanEqual:
+		case spv::OpULessThan:
+		case spv::OpSLessThan:
+		case spv::OpULessThanEqual:
+		case spv::OpSLessThanEqual:
+		case spv::OpShiftRightLogical:
+		case spv::OpShiftRightArithmetic:
+		case spv::OpShiftLeftLogical:
+		case spv::OpBitwiseOr:
+		case spv::OpBitwiseXor:
+		case spv::OpBitwiseAnd:
+		case spv::OpLogicalOr:
+		case spv::OpLogicalAnd:
+		case spv::OpLogicalEqual:
+		case spv::OpLogicalNotEqual:
+		case spv::OpUMulExtended:
+		case spv::OpSMulExtended:
+		case spv::OpIAddCarry:
+		case spv::OpISubBorrow:
+			return EmitBinaryOp(insn, state);
 
-	case spv::OpDot:
-		return EmitDot(insn, state);
+		case spv::OpDot:
+			return EmitDot(insn, state);
 
-	case spv::OpSelect:
-		return EmitSelect(insn, state);
+		case spv::OpSelect:
+			return EmitSelect(insn, state);
 
-	case spv::OpExtInst:
-		return EmitExtendedInstruction(insn, state);
+		case spv::OpExtInst:
+			return EmitExtendedInstruction(insn, state);
 
-	case spv::OpAny:
-		return EmitAny(insn, state);
+		case spv::OpAny:
+			return EmitAny(insn, state);
 
-	case spv::OpAll:
-		return EmitAll(insn, state);
+		case spv::OpAll:
+			return EmitAll(insn, state);
 
-	case spv::OpBranch:
-		return EmitBranch(insn, state);
+		case spv::OpBranch:
+			return EmitBranch(insn, state);
 
-	case spv::OpPhi:
-		return EmitPhi(insn, state);
+		case spv::OpPhi:
+			return EmitPhi(insn, state);
 
-	case spv::OpSelectionMerge:
-	case spv::OpLoopMerge:
-		return EmitResult::Continue;
+		case spv::OpSelectionMerge:
+		case spv::OpLoopMerge:
+			return EmitResult::Continue;
 
-	case spv::OpBranchConditional:
-		return EmitBranchConditional(insn, state);
+		case spv::OpBranchConditional:
+			return EmitBranchConditional(insn, state);
 
-	case spv::OpSwitch:
-		return EmitSwitch(insn, state);
+		case spv::OpSwitch:
+			return EmitSwitch(insn, state);
 
-	case spv::OpUnreachable:
-		return EmitUnreachable(insn, state);
+		case spv::OpUnreachable:
+			return EmitUnreachable(insn, state);
 
-	case spv::OpReturn:
-		return EmitReturn(insn, state);
+		case spv::OpReturn:
+			return EmitReturn(insn, state);
 
-	case spv::OpFunctionCall:
-		return EmitFunctionCall(insn, state);
+		case spv::OpFunctionCall:
+			return EmitFunctionCall(insn, state);
 
-	case spv::OpKill:
-		return EmitKill(insn, state);
+		case spv::OpKill:
+			return EmitKill(insn, state);
 
-	case spv::OpImageSampleImplicitLod:
-		return EmitImageSampleImplicitLod(None, insn, state);
+		case spv::OpImageSampleImplicitLod:
+			return EmitImageSampleImplicitLod(None, insn, state);
 
-	case spv::OpImageSampleExplicitLod:
-		return EmitImageSampleExplicitLod(None, insn, state);
+		case spv::OpImageSampleExplicitLod:
+			return EmitImageSampleExplicitLod(None, insn, state);
 
-	case spv::OpImageSampleDrefImplicitLod:
-		return EmitImageSampleImplicitLod(Dref, insn, state);
+		case spv::OpImageSampleDrefImplicitLod:
+			return EmitImageSampleImplicitLod(Dref, insn, state);
 
-	case spv::OpImageSampleDrefExplicitLod:
-		return EmitImageSampleExplicitLod(Dref, insn, state);
+		case spv::OpImageSampleDrefExplicitLod:
+			return EmitImageSampleExplicitLod(Dref, insn, state);
 
-	case spv::OpImageSampleProjImplicitLod:
-		return EmitImageSampleImplicitLod(Proj, insn, state);
+		case spv::OpImageSampleProjImplicitLod:
+			return EmitImageSampleImplicitLod(Proj, insn, state);
 
-	case spv::OpImageSampleProjExplicitLod:
-		return EmitImageSampleExplicitLod(Proj, insn, state);
+		case spv::OpImageSampleProjExplicitLod:
+			return EmitImageSampleExplicitLod(Proj, insn, state);
 
-	case spv::OpImageSampleProjDrefImplicitLod:
-		return EmitImageSampleImplicitLod(ProjDref, insn, state);
+		case spv::OpImageSampleProjDrefImplicitLod:
+			return EmitImageSampleImplicitLod(ProjDref, insn, state);
 
-	case spv::OpImageSampleProjDrefExplicitLod:
-		return EmitImageSampleExplicitLod(ProjDref, insn, state);
+		case spv::OpImageSampleProjDrefExplicitLod:
+			return EmitImageSampleExplicitLod(ProjDref, insn, state);
 
-	case spv::OpImageGather:
-		return EmitImageGather(None, insn, state);
+		case spv::OpImageGather:
+			return EmitImageGather(None, insn, state);
 
-	case spv::OpImageDrefGather:
-		return EmitImageGather(Dref, insn, state);
+		case spv::OpImageDrefGather:
+			return EmitImageGather(Dref, insn, state);
 
-	case spv::OpImageFetch:
-		return EmitImageFetch(insn, state);
+		case spv::OpImageFetch:
+			return EmitImageFetch(insn, state);
 
-	case spv::OpImageQuerySizeLod:
-		return EmitImageQuerySizeLod(insn, state);
+		case spv::OpImageQuerySizeLod:
+			return EmitImageQuerySizeLod(insn, state);
 
-	case spv::OpImageQuerySize:
-		return EmitImageQuerySize(insn, state);
+		case spv::OpImageQuerySize:
+			return EmitImageQuerySize(insn, state);
 
-	case spv::OpImageQueryLod:
-		return EmitImageQueryLod(insn, state);
+		case spv::OpImageQueryLod:
+			return EmitImageQueryLod(insn, state);
 
-	case spv::OpImageQueryLevels:
-		return EmitImageQueryLevels(insn, state);
+		case spv::OpImageQueryLevels:
+			return EmitImageQueryLevels(insn, state);
 
-	case spv::OpImageQuerySamples:
-		return EmitImageQuerySamples(insn, state);
+		case spv::OpImageQuerySamples:
+			return EmitImageQuerySamples(insn, state);
 
-	case spv::OpImageRead:
-		return EmitImageRead(insn, state);
+		case spv::OpImageRead:
+			return EmitImageRead(insn, state);
 
-	case spv::OpImageWrite:
-		return EmitImageWrite(insn, state);
+		case spv::OpImageWrite:
+			return EmitImageWrite(insn, state);
 
-	case spv::OpImageTexelPointer:
-		return EmitImageTexelPointer(insn, state);
+		case spv::OpImageTexelPointer:
+			return EmitImageTexelPointer(insn, state);
 
-	case spv::OpSampledImage:
-	case spv::OpImage:
-		return EmitSampledImageCombineOrSplit(insn, state);
+		case spv::OpSampledImage:
+		case spv::OpImage:
+			return EmitSampledImageCombineOrSplit(insn, state);
 
-	case spv::OpCopyObject:
-		return EmitCopyObject(insn, state);
+		case spv::OpCopyObject:
+			return EmitCopyObject(insn, state);
 
-	case spv::OpCopyMemory:
-		return EmitCopyMemory(insn, state);
+		case spv::OpCopyMemory:
+			return EmitCopyMemory(insn, state);
 
-	case spv::OpControlBarrier:
-		return EmitControlBarrier(insn, state);
+		case spv::OpControlBarrier:
+			return EmitControlBarrier(insn, state);
 
-	case spv::OpMemoryBarrier:
-		return EmitMemoryBarrier(insn, state);
+		case spv::OpMemoryBarrier:
+			return EmitMemoryBarrier(insn, state);
 
-	case spv::OpGroupNonUniformElect:
-	case spv::OpGroupNonUniformAll:
-	case spv::OpGroupNonUniformAny:
-	case spv::OpGroupNonUniformAllEqual:
-	case spv::OpGroupNonUniformBroadcast:
-	case spv::OpGroupNonUniformBroadcastFirst:
-	case spv::OpGroupNonUniformBallot:
-	case spv::OpGroupNonUniformInverseBallot:
-	case spv::OpGroupNonUniformBallotBitExtract:
-	case spv::OpGroupNonUniformBallotBitCount:
-	case spv::OpGroupNonUniformBallotFindLSB:
-	case spv::OpGroupNonUniformBallotFindMSB:
-	case spv::OpGroupNonUniformShuffle:
-	case spv::OpGroupNonUniformShuffleXor:
-	case spv::OpGroupNonUniformShuffleUp:
-	case spv::OpGroupNonUniformShuffleDown:
-	case spv::OpGroupNonUniformIAdd:
-	case spv::OpGroupNonUniformFAdd:
-	case spv::OpGroupNonUniformIMul:
-	case spv::OpGroupNonUniformFMul:
-	case spv::OpGroupNonUniformSMin:
-	case spv::OpGroupNonUniformUMin:
-	case spv::OpGroupNonUniformFMin:
-	case spv::OpGroupNonUniformSMax:
-	case spv::OpGroupNonUniformUMax:
-	case spv::OpGroupNonUniformFMax:
-	case spv::OpGroupNonUniformBitwiseAnd:
-	case spv::OpGroupNonUniformBitwiseOr:
-	case spv::OpGroupNonUniformBitwiseXor:
-	case spv::OpGroupNonUniformLogicalAnd:
-	case spv::OpGroupNonUniformLogicalOr:
-	case spv::OpGroupNonUniformLogicalXor:
-		return EmitGroupNonUniform(insn, state);
+		case spv::OpGroupNonUniformElect:
+		case spv::OpGroupNonUniformAll:
+		case spv::OpGroupNonUniformAny:
+		case spv::OpGroupNonUniformAllEqual:
+		case spv::OpGroupNonUniformBroadcast:
+		case spv::OpGroupNonUniformBroadcastFirst:
+		case spv::OpGroupNonUniformBallot:
+		case spv::OpGroupNonUniformInverseBallot:
+		case spv::OpGroupNonUniformBallotBitExtract:
+		case spv::OpGroupNonUniformBallotBitCount:
+		case spv::OpGroupNonUniformBallotFindLSB:
+		case spv::OpGroupNonUniformBallotFindMSB:
+		case spv::OpGroupNonUniformShuffle:
+		case spv::OpGroupNonUniformShuffleXor:
+		case spv::OpGroupNonUniformShuffleUp:
+		case spv::OpGroupNonUniformShuffleDown:
+		case spv::OpGroupNonUniformIAdd:
+		case spv::OpGroupNonUniformFAdd:
+		case spv::OpGroupNonUniformIMul:
+		case spv::OpGroupNonUniformFMul:
+		case spv::OpGroupNonUniformSMin:
+		case spv::OpGroupNonUniformUMin:
+		case spv::OpGroupNonUniformFMin:
+		case spv::OpGroupNonUniformSMax:
+		case spv::OpGroupNonUniformUMax:
+		case spv::OpGroupNonUniformFMax:
+		case spv::OpGroupNonUniformBitwiseAnd:
+		case spv::OpGroupNonUniformBitwiseOr:
+		case spv::OpGroupNonUniformBitwiseXor:
+		case spv::OpGroupNonUniformLogicalAnd:
+		case spv::OpGroupNonUniformLogicalOr:
+		case spv::OpGroupNonUniformLogicalXor:
+			return EmitGroupNonUniform(insn, state);
 
-	case spv::OpArrayLength:
-		return EmitArrayLength(insn, state);
+		case spv::OpArrayLength:
+			return EmitArrayLength(insn, state);
 
-	default:
-		UNREACHABLE("%s", OpcodeName(opcode).c_str());
-		break;
+		default:
+			UNREACHABLE("%s", OpcodeName(opcode).c_str());
+			break;
 	}
 
 	return EmitResult::Continue;
@@ -1948,8 +1953,8 @@
 	for(auto i = 0u; i < insn.wordCount() - 3; i++)
 	{
 		Object::ID srcObjectId = insn.word(3u + i);
-		auto & srcObject = getObject(srcObjectId);
-		auto & srcObjectTy = getType(srcObject.type);
+		auto &srcObject = getObject(srcObjectId);
+		auto &srcObjectTy = getType(srcObject.type);
 		GenericValue srcObjectAccess(this, state, srcObjectId);
 
 		for(auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
@@ -2092,7 +2097,7 @@
 	for(auto i = 0u; i < type.sizeInComponents; i++)
 	{
 		auto sel = cond.Int(condIsScalar ? 0 : i);
-		dst.move(i, (sel & lhs.Int(i)) | (~sel & rhs.Int(i)));   // TODO: IfThenElse()
+		dst.move(i, (sel & lhs.Int(i)) | (~sel & rhs.Int(i)));  // TODO: IfThenElse()
 	}
 
 	return EmitResult::Continue;
@@ -2160,41 +2165,41 @@
 			UInt v;
 			switch(insn.opcode())
 			{
-			case spv::OpAtomicIAdd:
-			case spv::OpAtomicIIncrement:
-				v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
-				break;
-			case spv::OpAtomicISub:
-			case spv::OpAtomicIDecrement:
-				v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
-				break;
-			case spv::OpAtomicAnd:
-				v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
-				break;
-			case spv::OpAtomicOr:
-				v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
-				break;
-			case spv::OpAtomicXor:
-				v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
-				break;
-			case spv::OpAtomicSMin:
-				v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
-				break;
-			case spv::OpAtomicSMax:
-				v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
-				break;
-			case spv::OpAtomicUMin:
-				v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
-				break;
-			case spv::OpAtomicUMax:
-				v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
-				break;
-			case spv::OpAtomicExchange:
-				v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
-				break;
-			default:
-				UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
-				break;
+				case spv::OpAtomicIAdd:
+				case spv::OpAtomicIIncrement:
+					v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicISub:
+				case spv::OpAtomicIDecrement:
+					v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicAnd:
+					v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicOr:
+					v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicXor:
+					v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicSMin:
+					v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
+					break;
+				case spv::OpAtomicSMax:
+					v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
+					break;
+				case spv::OpAtomicUMin:
+					v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicUMax:
+					v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicExchange:
+					v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+					break;
+				default:
+					UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
+					break;
 			}
 			x = Insert(x, v, j);
 		}
@@ -2298,25 +2303,25 @@
 	{
 		switch(insn.opcode())
 		{
-		case spv::OpVariable:
-		{
-			Object::ID resultId = insn.word(2);
-			auto &object = getObject(resultId);
-			auto &objectTy = getType(object.type);
-			if(object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
+			case spv::OpVariable:
 			{
-				auto &dst = routine->getVariable(resultId);
-				int offset = 0;
-				VisitInterface(resultId,
-							   [&](Decorations const &d, AttribType type) {
-								   auto scalarSlot = d.Location << 2 | d.Component;
-								   routine->outputs[scalarSlot] = dst[offset++];
-							   });
+				Object::ID resultId = insn.word(2);
+				auto &object = getObject(resultId);
+				auto &objectTy = getType(object.type);
+				if(object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
+				{
+					auto &dst = routine->getVariable(resultId);
+					int offset = 0;
+					VisitInterface(resultId,
+					               [&](Decorations const &d, AttribType type) {
+						               auto scalarSlot = d.Location << 2 | d.Component;
+						               routine->outputs[scalarSlot] = dst[offset++];
+					               });
+				}
+				break;
 			}
-			break;
-		}
-		default:
-			break;
+			default:
+				break;
 		}
 	}
 
@@ -2331,47 +2336,46 @@
 {
 	switch(model)
 	{
-	case spv::ExecutionModelVertex:                 return VK_SHADER_STAGE_VERTEX_BIT;
-	// case spv::ExecutionModelTessellationControl:    return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
-	// case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
-	// case spv::ExecutionModelGeometry:               return VK_SHADER_STAGE_GEOMETRY_BIT;
-	case spv::ExecutionModelFragment:               return VK_SHADER_STAGE_FRAGMENT_BIT;
-	case spv::ExecutionModelGLCompute:              return VK_SHADER_STAGE_COMPUTE_BIT;
-	// case spv::ExecutionModelKernel:                 return VkShaderStageFlagBits(0); // Not supported by vulkan.
-	// case spv::ExecutionModelTaskNV:                 return VK_SHADER_STAGE_TASK_BIT_NV;
-	// case spv::ExecutionModelMeshNV:                 return VK_SHADER_STAGE_MESH_BIT_NV;
-	// case spv::ExecutionModelRayGenerationNV:        return VK_SHADER_STAGE_RAYGEN_BIT_NV;
-	// case spv::ExecutionModelIntersectionNV:         return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
-	// case spv::ExecutionModelAnyHitNV:               return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
-	// case spv::ExecutionModelClosestHitNV:           return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
-	// case spv::ExecutionModelMissNV:                 return VK_SHADER_STAGE_MISS_BIT_NV;
-	// case spv::ExecutionModelCallableNV:             return VK_SHADER_STAGE_CALLABLE_BIT_NV;
-	default:
-		UNSUPPORTED("ExecutionModel: %d", int(model));
-		return VkShaderStageFlagBits(0);
+		case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT;
+		// case spv::ExecutionModelTessellationControl:    return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+		// case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+		// case spv::ExecutionModelGeometry:               return VK_SHADER_STAGE_GEOMETRY_BIT;
+		case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
+		case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
+		// case spv::ExecutionModelKernel:                 return VkShaderStageFlagBits(0); // Not supported by vulkan.
+		// case spv::ExecutionModelTaskNV:                 return VK_SHADER_STAGE_TASK_BIT_NV;
+		// case spv::ExecutionModelMeshNV:                 return VK_SHADER_STAGE_MESH_BIT_NV;
+		// case spv::ExecutionModelRayGenerationNV:        return VK_SHADER_STAGE_RAYGEN_BIT_NV;
+		// case spv::ExecutionModelIntersectionNV:         return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
+		// case spv::ExecutionModelAnyHitNV:               return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
+		// case spv::ExecutionModelClosestHitNV:           return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
+		// case spv::ExecutionModelMissNV:                 return VK_SHADER_STAGE_MISS_BIT_NV;
+		// case spv::ExecutionModelCallableNV:             return VK_SHADER_STAGE_CALLABLE_BIT_NV;
+		default:
+			UNSUPPORTED("ExecutionModel: %d", int(model));
+			return VkShaderStageFlagBits(0);
 	}
 }
 
-SpirvShader::GenericValue::GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId) :
-		obj(shader->getObject(objId)),
-		intermediate(obj.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(objId) : nullptr),
-		type(obj.type) {}
+SpirvShader::GenericValue::GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId)
+    : obj(shader->getObject(objId))
+    , intermediate(obj.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(objId) : nullptr)
+    , type(obj.type)
+{}
 
-SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
-	pipelineLayout(pipelineLayout)
+SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout)
+    : pipelineLayout(pipelineLayout)
 {
 }
 
 void SpirvRoutine::setImmutableInputBuiltins(SpirvShader const *shader)
 {
-	setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
 	});
 
-	setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 4);
 		value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 2, 4, 8));
 		value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2379,8 +2383,7 @@
 		value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
 	});
 
-	setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 4);
 		value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(15, 14, 12, 8));
 		value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2388,8 +2391,7 @@
 		value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
 	});
 
-	setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 4);
 		value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(14, 12, 8, 0));
 		value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2397,8 +2399,7 @@
 		value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
 	});
 
-	setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 4);
 		value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 3, 7, 15));
 		value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2406,8 +2407,7 @@
 		value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
 	});
 
-	setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 4);
 		value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(0, 1, 3, 7));
 		value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2415,8 +2415,7 @@
 		value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
 	});
 
-	setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 1);
 		// Only a single physical device is supported.
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index e3d3af7..ecc3f5b 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -15,15 +15,15 @@
 #ifndef sw_SpirvShader_hpp
 #define sw_SpirvShader_hpp
 
-#include "ShaderCore.hpp"
 #include "SamplerCore.hpp"
+#include "ShaderCore.hpp"
 #include "SpirvID.hpp"
-#include "System/Types.hpp"
-#include "Vulkan/VkDebug.hpp"
-#include "Vulkan/VkConfig.h"
-#include "Vulkan/VkDescriptorSet.hpp"
 #include "Device/Config.hpp"
 #include "Device/Sampler.hpp"
+#include "System/Types.hpp"
+#include "Vulkan/VkConfig.h"
+#include "Vulkan/VkDebug.hpp"
+#include "Vulkan/VkDescriptorSet.hpp"
 
 #include <spirv/unified1/spirv.hpp>
 
@@ -31,16 +31,16 @@
 #include <atomic>
 #include <cstdint>
 #include <cstring>
+#include <deque>
 #include <functional>
 #include <memory>
-#include <deque>
 #include <string>
 #include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
-#undef Yield // b/127920555
+#undef Yield  // b/127920555
 
 namespace vk {
 
@@ -65,8 +65,11 @@
 class Intermediate
 {
 public:
-	Intermediate(uint32_t size) : scalar(new rr::Value*[size]), size(size) {
-		memset(scalar, 0, sizeof(rr::Value*) * size);
+	Intermediate(uint32_t size)
+	    : scalar(new rr::Value *[size])
+	    , size(size)
+	{
+		memset(scalar, 0, sizeof(rr::Value *) * size);
 	}
 
 	~Intermediate()
@@ -75,12 +78,12 @@
 	}
 
 	void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value); }
-	void move(uint32_t i, RValue<SIMD::Int> &&scalar)   { emplace(i, scalar.value); }
-	void move(uint32_t i, RValue<SIMD::UInt> &&scalar)  { emplace(i, scalar.value); }
+	void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); }
+	void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); }
 
 	void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value); }
-	void move(uint32_t i, const RValue<SIMD::Int> &scalar)   { emplace(i, scalar.value); }
-	void move(uint32_t i, const RValue<SIMD::UInt> &scalar)  { emplace(i, scalar.value); }
+	void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); }
+	void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); }
 
 	// Value retrieval functions.
 	RValue<SIMD::Float> Float(uint32_t i) const
@@ -107,8 +110,8 @@
 	// No copy/move construction or assignment
 	Intermediate(Intermediate const &) = delete;
 	Intermediate(Intermediate &&) = delete;
-	Intermediate & operator=(Intermediate const &) = delete;
-	Intermediate & operator=(Intermediate &&) = delete;
+	Intermediate &operator=(Intermediate const &) = delete;
+	Intermediate &operator=(Intermediate &&) = delete;
 
 private:
 	void emplace(uint32_t i, rr::Value *value)
@@ -128,7 +131,7 @@
 	using InsnStore = std::vector<uint32_t>;
 	InsnStore insns;
 
-	using ImageSampler = void(void* texture, void *sampler, void* uvsIn, void* texelOut, void* constants);
+	using ImageSampler = void(void *texture, void *sampler, void *uvsIn, void *texelOut, void *constants);
 
 	enum class YieldResult
 	{
@@ -157,15 +160,15 @@
 			return iter[n];
 		}
 
-		uint32_t const * wordPointer(uint32_t n) const
+		uint32_t const *wordPointer(uint32_t n) const
 		{
 			ASSERT(n < wordCount());
 			return &iter[n];
 		}
 
-		const char* string(uint32_t n) const
+		const char *string(uint32_t n) const
 		{
-			return reinterpret_cast<const char*>(wordPointer(n));
+			return reinterpret_cast<const char *>(wordPointer(n));
 		}
 
 		bool operator==(InsnIterator const &other) const
@@ -191,7 +194,7 @@
 
 		InsnIterator const operator++(int)
 		{
-			InsnIterator ret{*this};
+			InsnIterator ret{ *this };
 			iter += wordCount();
 			return ret;
 		}
@@ -200,7 +203,8 @@
 
 		InsnIterator() = default;
 
-		explicit InsnIterator(InsnStore::const_iterator iter) : iter{iter}
+		explicit InsnIterator(InsnStore::const_iterator iter)
+		    : iter{ iter }
 		{
 		}
 	};
@@ -208,12 +212,12 @@
 	/* range-based-for interface */
 	InsnIterator begin() const
 	{
-		return InsnIterator{insns.cbegin() + 5};
+		return InsnIterator{ insns.cbegin() + 5 };
 	}
 
 	InsnIterator end() const
 	{
-		return InsnIterator{insns.cend()};
+		return InsnIterator{ insns.cend() };
 	}
 
 	class Type
@@ -287,11 +291,11 @@
 			ID from;
 			ID to;
 
-			bool operator == (const Edge& other) const { return from == other.from && to == other.to; }
+			bool operator==(const Edge &other) const { return from == other.from && to == other.to; }
 
 			struct Hash
 			{
-				std::size_t operator()(const Edge& edge) const noexcept
+				std::size_t operator()(const Edge &edge) const noexcept
 				{
 					return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value());
 				}
@@ -299,7 +303,7 @@
 		};
 
 		Block() = default;
-		Block(const Block& other) = default;
+		Block(const Block &other) = default;
 		explicit Block(InsnIterator begin, InsnIterator end);
 
 		/* range-based-for interface */
@@ -308,22 +312,23 @@
 
 		enum Kind
 		{
-			Simple, // OpBranch or other simple terminator.
-			StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional
-			UnstructuredBranchConditional, // OpBranchConditional
-			StructuredSwitch, // OpSelectionMerge + OpSwitch
-			UnstructuredSwitch, // OpSwitch
-			Loop, // OpLoopMerge + [OpBranchConditional | OpBranch]
+			Simple,                         // OpBranch or other simple terminator.
+			StructuredBranchConditional,    // OpSelectionMerge + OpBranchConditional
+			UnstructuredBranchConditional,  // OpBranchConditional
+			StructuredSwitch,               // OpSelectionMerge + OpSwitch
+			UnstructuredSwitch,             // OpSwitch
+			Loop,                           // OpLoopMerge + [OpBranchConditional | OpBranch]
 		};
 
 		Kind kind = Simple;
-		InsnIterator mergeInstruction; // Structured control flow merge instruction.
-		InsnIterator branchInstruction; // Branch instruction.
-		ID mergeBlock; // Structured flow merge block.
-		ID continueTarget; // Loop continue block.
-		Set ins; // Blocks that branch into this block.
-		Set outs; // Blocks that this block branches to.
+		InsnIterator mergeInstruction;   // Structured control flow merge instruction.
+		InsnIterator branchInstruction;  // Branch instruction.
+		ID mergeBlock;                   // Structured flow merge block.
+		ID continueTarget;               // Loop continue block.
+		Set ins;                         // Blocks that branch into this block.
+		Set outs;                        // Blocks that this block branches to.
 		bool isLoopMerge = false;
+
 	private:
 		InsnIterator begin_;
 		InsnIterator end_;
@@ -336,7 +341,7 @@
 
 		// Walks all reachable the blocks starting from id adding them to
 		// reachable.
-		void TraverseReachableBlocks(Block::ID id, Block::Set& reachable) const;
+		void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const;
 
 		// AssignBlockFields() performs the following for all reachable blocks:
 		// * Assigns Block::ins with the identifiers of all blocks that contain
@@ -362,13 +367,14 @@
 			return it->second;
 		}
 
-		Block::ID entry; // function entry point block.
-		HandleMap<Block> blocks; // blocks belonging to this function.
-		Type::ID type; // type of the function.
-		Type::ID result; // return type.
+		Block::ID entry;          // function entry point block.
+		HandleMap<Block> blocks;  // blocks belonging to this function.
+		Type::ID type;            // type of the function.
+		Type::ID result;          // return type.
 	};
 
-	struct TypeOrObject {}; // Dummy struct to represent a Type or Object.
+	struct TypeOrObject
+	{};  // Dummy struct to represent a Type or Object.
 
 	// TypeOrObjectID is an identifier that represents a Type or an Object,
 	// and supports implicit casting to and from Type::ID or Object::ID.
@@ -377,9 +383,15 @@
 	public:
 		using Hash = std::hash<SpirvID<TypeOrObject>>;
 
-		inline TypeOrObjectID(uint32_t id) : SpirvID(id) {}
-		inline TypeOrObjectID(Type::ID id) : SpirvID(id.value()) {}
-		inline TypeOrObjectID(Object::ID id) : SpirvID(id.value()) {}
+		inline TypeOrObjectID(uint32_t id)
+		    : SpirvID(id)
+		{}
+		inline TypeOrObjectID(Type::ID id)
+		    : SpirvID(id.value())
+		{}
+		inline TypeOrObjectID(Object::ID id)
+		    : SpirvID(id.value())
+		{}
 		inline operator Type::ID() const { return Type::ID(value()); }
 		inline operator Object::ID() const { return Object::ID(value()); }
 	};
@@ -399,14 +411,16 @@
 	struct ImageInstruction
 	{
 		ImageInstruction(Variant variant, SamplerMethod samplerMethod)
-			: parameters(0)
+		    : parameters(0)
 		{
 			this->variant = variant;
 			this->samplerMethod = samplerMethod;
 		}
 
 		// Unmarshal from raw 32-bit data
-		ImageInstruction(uint32_t parameters) : parameters(parameters) {}
+		ImageInstruction(uint32_t parameters)
+		    : parameters(parameters)
+		{}
 
 		SamplerFunction getSamplerFunction() const
 		{
@@ -432,12 +446,12 @@
 				uint32_t gatherComponent : 2;
 
 				// Parameters are passed to the sampling routine in this order:
-				uint32_t coordinates : 3;       // 1-4 (does not contain projection component)
-			//	uint32_t dref : 1;              // Indicated by Variant::ProjDref|Dref
-			//	uint32_t lodOrBias : 1;         // Indicated by SamplerMethod::Lod|Bias|Fetch
-				uint32_t grad : 2;              // 0-3 components (for each of dx / dy)
-				uint32_t offset : 2;            // 0-3 components
-				uint32_t sample : 1;            // 0-1 scalar integer
+				uint32_t coordinates : 3;  // 1-4 (does not contain projection component)
+				                           //	uint32_t dref : 1;              // Indicated by Variant::ProjDref|Dref
+				                           //	uint32_t lodOrBias : 1;         // Indicated by SamplerMethod::Lod|Bias|Fetch
+				uint32_t grad : 2;         // 0-3 components (for each of dx / dy)
+				uint32_t offset : 2;       // 0-3 components
+				uint32_t sample : 1;       // 0-1 scalar integer
 			};
 
 			uint32_t parameters;
@@ -450,7 +464,7 @@
 	// shader entry point represented by this object.
 	uint64_t getSerialID() const
 	{
-		return  ((uint64_t)entryPoint.value() << 32) | codeSerialID;
+		return ((uint64_t)entryPoint.value() << 32) | codeSerialID;
 	}
 
 	SpirvShader(uint32_t codeSerialID,
@@ -577,7 +591,7 @@
 		bool HasOffset : 1;
 		bool HasArrayStride : 1;
 		bool HasMatrixStride : 1;
-		bool HasRowMajor : 1;		// whether RowMajor bit is valid.
+		bool HasRowMajor : 1;  // whether RowMajor bit is valid.
 
 		bool Flat : 1;
 		bool Centroid : 1;
@@ -585,21 +599,31 @@
 		bool Block : 1;
 		bool BufferBlock : 1;
 		bool RelaxedPrecision : 1;
-		bool RowMajor : 1;			// RowMajor if true; ColMajor if false
-		bool InsideMatrix : 1;		// pseudo-decoration for whether we're inside a matrix.
+		bool RowMajor : 1;      // RowMajor if true; ColMajor if false
+		bool InsideMatrix : 1;  // pseudo-decoration for whether we're inside a matrix.
 
 		Decorations()
-				: Location{-1}, Component{0},
-				  BuiltIn{static_cast<spv::BuiltIn>(-1)},
-				  Offset{-1}, ArrayStride{-1}, MatrixStride{-1},
-				  HasLocation{false}, HasComponent{false},
-				  HasBuiltIn{false}, HasOffset{false},
-				  HasArrayStride{false}, HasMatrixStride{false},
-				  HasRowMajor{false},
-				  Flat{false}, Centroid{false}, NoPerspective{false},
-				  Block{false}, BufferBlock{false},
-				  RelaxedPrecision{false}, RowMajor{false},
-				  InsideMatrix{false}
+		    : Location{ -1 }
+		    , Component{ 0 }
+		    , BuiltIn{ static_cast<spv::BuiltIn>(-1) }
+		    , Offset{ -1 }
+		    , ArrayStride{ -1 }
+		    , MatrixStride{ -1 }
+		    , HasLocation{ false }
+		    , HasComponent{ false }
+		    , HasBuiltIn{ false }
+		    , HasOffset{ false }
+		    , HasArrayStride{ false }
+		    , HasMatrixStride{ false }
+		    , HasRowMajor{ false }
+		    , Flat{ false }
+		    , Centroid{ false }
+		    , NoPerspective{ false }
+		    , Block{ false }
+		    , BufferBlock{ false }
+		    , RelaxedPrecision{ false }
+		    , RowMajor{ false }
+		    , InsideMatrix{ false }
 		{
 		}
 
@@ -642,7 +666,8 @@
 		};
 
 		InterfaceComponent()
-			: Type{ATTRIBTYPE_UNUSED}, DecorationBits{0}
+		    : Type{ ATTRIBTYPE_UNUSED }
+		    , DecorationBits{ 0 }
 		{
 		}
 	};
@@ -673,9 +698,10 @@
 		}
 		// returns the total allocated size in bytes.
 		inline uint32_t size() const { return totalSize; }
+
 	private:
-		uint32_t totalSize = 0; // in bytes
-		std::unordered_map<Object::ID, uint32_t> offsets; // in bytes
+		uint32_t totalSize = 0;                            // in bytes
+		std::unordered_map<Object::ID, uint32_t> offsets;  // in bytes
 	};
 
 	std::vector<InterfaceComponent> inputs;
@@ -700,7 +726,7 @@
 	Function::ID entryPoint;
 
 	const bool robustBufferAccess = true;
-	spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing.
+	spv::ExecutionModel executionModel = spv::ExecutionModelMax;  // Invalid prior to OpEntryPoint parsing.
 
 	// DeclareType creates a Type for the given OpTypeX instruction, storing
 	// it into the types map. It is called from the analysis pass (constructor).
@@ -768,30 +794,30 @@
 
 	using InterfaceVisitor = std::function<void(Decorations const, AttribType)>;
 
-	void VisitInterface(Object::ID id, const InterfaceVisitor& v) const;
+	void VisitInterface(Object::ID id, const InterfaceVisitor &v) const;
 
-	int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor& v) const;
+	int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const;
 
 	// MemoryElement describes a scalar element within a structure, and is
 	// used by the callback function of VisitMemoryObject().
 	struct MemoryElement
 	{
-		uint32_t index;   // index of the scalar element
-		uint32_t offset;  // offset (in bytes) from the base of the object
-		const Type& type; // element type
+		uint32_t index;    // index of the scalar element
+		uint32_t offset;   // offset (in bytes) from the base of the object
+		const Type &type;  // element type
 	};
 
-	using MemoryVisitor = std::function<void(const MemoryElement&)>;
+	using MemoryVisitor = std::function<void(const MemoryElement &)>;
 
 	// VisitMemoryObject() walks a type tree in an explicitly laid out
 	// storage class, calling the MemoryVisitor for each scalar element
 	// within the
-	void VisitMemoryObject(Object::ID id, const MemoryVisitor& v) const;
+	void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const;
 
 	// VisitMemoryObjectInner() is internally called by VisitMemoryObject()
-	void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor& v) const;
+	void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const;
 
-	Object& CreateConstant(InsnIterator it);
+	Object &CreateConstant(InsnIterator it);
 
 	void ProcessInterfaceVariable(Object &object);
 
@@ -800,19 +826,19 @@
 	{
 	public:
 		EmitState(SpirvRoutine *routine,
-				Function::ID function,
-				RValue<SIMD::Int> activeLaneMask,
-				RValue<SIMD::Int> storesAndAtomicsMask,
-				const vk::DescriptorSet::Bindings &descriptorSets,
-				bool robustBufferAccess,
-				spv::ExecutionModel executionModel)
-			: routine(routine),
-			  function(function),
-			  activeLaneMaskValue(activeLaneMask.value),
-			  storesAndAtomicsMaskValue(storesAndAtomicsMask.value),
-			  descriptorSets(descriptorSets),
-			  robustBufferAccess(robustBufferAccess),
-			  executionModel(executionModel)
+		          Function::ID function,
+		          RValue<SIMD::Int> activeLaneMask,
+		          RValue<SIMD::Int> storesAndAtomicsMask,
+		          const vk::DescriptorSet::Bindings &descriptorSets,
+		          bool robustBufferAccess,
+		          spv::ExecutionModel executionModel)
+		    : routine(routine)
+		    , function(function)
+		    , activeLaneMaskValue(activeLaneMask.value)
+		    , storesAndAtomicsMaskValue(storesAndAtomicsMask.value)
+		    , descriptorSets(descriptorSets)
+		    , robustBufferAccess(robustBufferAccess)
+		    , executionModel(executionModel)
 		{
 			ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0));  // Must parse OpEntryPoint before emitting.
 		}
@@ -845,12 +871,12 @@
 		// they will be ORed together.
 		void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask);
 
-		SpirvRoutine *routine = nullptr; // The current routine being built.
-		Function::ID function; // The current function being built.
-		Block::ID block; // The current block being built.
-		rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask.
-		rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask.
-		Block::Set visited; // Blocks already built.
+		SpirvRoutine *routine = nullptr;                 // The current routine being built.
+		Function::ID function;                           // The current function being built.
+		Block::ID block;                                 // The current block being built.
+		rr::Value *activeLaneMaskValue = nullptr;        // The current active lane mask.
+		rr::Value *storesAndAtomicsMaskValue = nullptr;  // The current atomics mask.
+		Block::Set visited;                              // Blocks already built.
 		std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
 		std::deque<Block::ID> *pending;
 
@@ -858,16 +884,16 @@
 
 		OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
 
-		Intermediate& createIntermediate(Object::ID id, uint32_t size)
+		Intermediate &createIntermediate(Object::ID id, uint32_t size)
 		{
 			auto it = intermediates.emplace(std::piecewise_construct,
-					std::forward_as_tuple(id),
-					std::forward_as_tuple(size));
+			                                std::forward_as_tuple(id),
+			                                std::forward_as_tuple(size));
 			ASSERT_MSG(it.second, "Intermediate %d created twice", id.value());
 			return it.first->second;
 		}
 
-		Intermediate const& getIntermediate(Object::ID id) const
+		Intermediate const &getIntermediate(Object::ID id) const
 		{
 			auto it = intermediates.find(id);
 			ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value());
@@ -880,7 +906,7 @@
 			ASSERT_MSG(added, "Pointer %d created twice", id.value());
 		}
 
-		SIMD::Pointer const& getPointer(Object::ID id) const
+		SIMD::Pointer const &getPointer(Object::ID id) const
 		{
 			auto it = pointers.find(id);
 			ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
@@ -898,8 +924,8 @@
 	// EmitResult is an enumerator of result values from the Emit functions.
 	enum class EmitResult
 	{
-		Continue, // No termination instructions.
-		Terminator, // Reached a termination instruction.
+		Continue,    // No termination instructions.
+		Terminator,  // Reached a termination instruction.
 	};
 
 	// Generic wrapper over either per-lane intermediate value, or a constant.
@@ -1058,7 +1084,7 @@
 	EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const;
 
 	void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
-	SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
+	SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
 	uint32_t GetConstScalarInt(Object::ID id) const;
 	void EvalSpecConstantOp(InsnIterator insn);
 	void EvalSpecConstantUnaryOp(InsnIterator insn);
@@ -1071,7 +1097,7 @@
 	// StorePhi updates the phi's alloca storage value using the incoming
 	// values from blocks that are both in the OpPhi instruction and in
 	// filter.
-	void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const;
+	void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const;
 
 	// Emits a rr::Fence for the given MemorySemanticsMask.
 	void Fence(spv::MemorySemanticsMask semantics) const;
@@ -1085,7 +1111,7 @@
 	static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics);
 
 	// Helper as we often need to take dot products as part of doing other things.
-	SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
+	SIMD::Float Dot(unsigned numComponents, GenericValue const &x, GenericValue const &y) const;
 
 	// Splits x into a floating-point significand in the range [0.5, 1.0)
 	// and an integral exponent of two, such that:
@@ -1121,21 +1147,21 @@
 		Pointer<Byte> function;
 	};
 
-	vk::PipelineLayout const * const pipelineLayout;
+	vk::PipelineLayout const *const pipelineLayout;
 
 	std::unordered_map<SpirvShader::Object::ID, Variable> variables;
 	std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache;
-	Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
-	Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
+	Variable inputs = Variable{ MAX_INTERFACE_COMPONENTS };
+	Variable outputs = Variable{ MAX_INTERFACE_COMPONENTS };
 
 	Pointer<Byte> workgroupMemory;
 	Pointer<Pointer<Byte>> descriptorSets;
 	Pointer<Int> descriptorDynamicOffsets;
 	Pointer<Byte> pushConstants;
 	Pointer<Byte> constants;
-	Int killMask = Int{0};
+	Int killMask = Int{ 0 };
 	SIMD::Int windowSpacePosition[2];
-	Int viewID;	// slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
+	Int viewID;  // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
 
 	void createVariable(SpirvShader::Object::ID id, uint32_t size)
 	{
@@ -1143,7 +1169,7 @@
 		ASSERT_MSG(added, "Variable %d created twice", id.value());
 	}
 
-	Variable& getVariable(SpirvShader::Object::ID id)
+	Variable &getVariable(SpirvShader::Object::ID id)
 	{
 		auto it = variables.find(id);
 		ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value());
@@ -1158,13 +1184,13 @@
 	// uses the input builtin, otherwise the call is a no-op.
 	// F is a function with the signature:
 	// void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	template <typename F>
-	inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F&& f)
+	template<typename F>
+	inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f)
 	{
 		auto it = shader->inputBuiltins.find(id);
 		if(it != shader->inputBuiltins.end())
 		{
-			const auto& builtin = it->second;
+			const auto &builtin = it->second;
 			f(builtin, getVariable(builtin.Id));
 		}
 	}
@@ -1176,7 +1202,6 @@
 	friend class SpirvShader;
 
 	std::unordered_map<SpirvShader::Object::ID, Variable> phis;
-
 };
 
 }  // namespace sw
diff --git a/src/Pipeline/SpirvShaderArithmetic.cpp b/src/Pipeline/SpirvShaderArithmetic.cpp
index 218df0e..f039124 100644
--- a/src/Pipeline/SpirvShaderArithmetic.cpp
+++ b/src/Pipeline/SpirvShaderArithmetic.cpp
@@ -163,153 +163,152 @@
 	{
 		switch(insn.opcode())
 		{
-		case spv::OpNot:
-		case spv::OpLogicalNot:		// logical not == bitwise not due to all-bits boolean representation
-			dst.move(i, ~src.UInt(i));
-			break;
-		case spv::OpBitFieldInsert:
-		{
-			auto insert = GenericValue(this, state, insn.word(4)).UInt(i);
-			auto offset = GenericValue(this, state, insn.word(5)).UInt(0);
-			auto count = GenericValue(this, state, insn.word(6)).UInt(0);
-			auto one = SIMD::UInt(1);
-			auto v = src.UInt(i);
-			auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
-			dst.move(i, (v & ~mask) | ((insert << offset) & mask));
-			break;
-		}
-		case spv::OpBitFieldSExtract:
-		case spv::OpBitFieldUExtract:
-		{
-			auto offset = GenericValue(this, state, insn.word(4)).UInt(0);
-			auto count = GenericValue(this, state, insn.word(5)).UInt(0);
-			auto one = SIMD::UInt(1);
-			auto v = src.UInt(i);
-			SIMD::UInt out = (v >> offset) & Bitmask32(count);
-			if(insn.opcode() == spv::OpBitFieldSExtract)
+			case spv::OpNot:
+			case spv::OpLogicalNot:  // logical not == bitwise not due to all-bits boolean representation
+				dst.move(i, ~src.UInt(i));
+				break;
+			case spv::OpBitFieldInsert:
 			{
-				auto sign = out & NthBit32(count - one);
-				auto sext = ~(sign - one);
-				out |= sext;
+				auto insert = GenericValue(this, state, insn.word(4)).UInt(i);
+				auto offset = GenericValue(this, state, insn.word(5)).UInt(0);
+				auto count = GenericValue(this, state, insn.word(6)).UInt(0);
+				auto one = SIMD::UInt(1);
+				auto v = src.UInt(i);
+				auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
+				dst.move(i, (v & ~mask) | ((insert << offset) & mask));
+				break;
 			}
-			dst.move(i, out);
-			break;
-		}
-		case spv::OpBitReverse:
-		{
-			// TODO: Add an intrinsic to reactor. Even if there isn't a
-			// single vector instruction, there may be target-dependent
-			// ways to make this faster.
-			// https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
-			SIMD::UInt v = src.UInt(i);
-			v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
-			v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
-			v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
-			v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
-			v = (v >> 16) | (v << 16);
-			dst.move(i, v);
-			break;
-		}
-		case spv::OpBitCount:
-			dst.move(i, CountBits(src.UInt(i)));
-			break;
-		case spv::OpSNegate:
-			dst.move(i, -src.Int(i));
-			break;
-		case spv::OpFNegate:
-			dst.move(i, -src.Float(i));
-			break;
-		case spv::OpConvertFToU:
-			dst.move(i, SIMD::UInt(src.Float(i)));
-			break;
-		case spv::OpConvertFToS:
-			dst.move(i, SIMD::Int(src.Float(i)));
-			break;
-		case spv::OpConvertSToF:
-			dst.move(i, SIMD::Float(src.Int(i)));
-			break;
-		case spv::OpConvertUToF:
-			dst.move(i, SIMD::Float(src.UInt(i)));
-			break;
-		case spv::OpBitcast:
-			dst.move(i, src.Float(i));
-			break;
-		case spv::OpIsInf:
-			dst.move(i, IsInf(src.Float(i)));
-			break;
-		case spv::OpIsNan:
-			dst.move(i, IsNan(src.Float(i)));
-			break;
-		case spv::OpDPdx:
-		case spv::OpDPdxCoarse:
-			// Derivative instructions: FS invocations are laid out like so:
-			//    0 1
-			//    2 3
-			static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
-			dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
-			break;
-		case spv::OpDPdy:
-		case spv::OpDPdyCoarse:
-			dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
-			break;
-		case spv::OpFwidth:
-		case spv::OpFwidthCoarse:
-			dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
-						+ Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
-			break;
-		case spv::OpDPdxFine:
-		{
-			auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
-			auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
-			SIMD::Float v = SIMD::Float(firstRow);
-			v = Insert(v, secondRow, 2);
-			v = Insert(v, secondRow, 3);
-			dst.move(i, v);
-			break;
-		}
-		case spv::OpDPdyFine:
-		{
-			auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
-			auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
-			SIMD::Float v = SIMD::Float(firstColumn);
-			v = Insert(v, secondColumn, 1);
-			v = Insert(v, secondColumn, 3);
-			dst.move(i, v);
-			break;
-		}
-		case spv::OpFwidthFine:
-		{
-			auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
-			auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
-			SIMD::Float dpdx = SIMD::Float(firstRow);
-			dpdx = Insert(dpdx, secondRow, 2);
-			dpdx = Insert(dpdx, secondRow, 3);
-			auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
-			auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
-			SIMD::Float dpdy = SIMD::Float(firstColumn);
-			dpdy = Insert(dpdy, secondColumn, 1);
-			dpdy = Insert(dpdy, secondColumn, 3);
-			dst.move(i, Abs(dpdx) + Abs(dpdy));
-			break;
-		}
-		case spv::OpQuantizeToF16:
-		{
-			// Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp
-			auto abs = Abs(src.Float(i));
-			auto sign = src.Int(i) & SIMD::Int(0x80000000);
-			auto isZero = CmpLT(abs, SIMD::Float(0.000061035f));
-			auto isInf  = CmpGT(abs, SIMD::Float(65504.0f));
-			auto isNaN  = IsNan(abs);
-			auto isInfOrNan = isInf | isNaN;
-			SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000);
-			v &= ~isZero | SIMD::Int(0x80000000);
-			v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v);
-			v |= isNaN & SIMD::Int(0x400000);
-			dst.move(i, v);
-			break;
-		}
-		default:
-			UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
+			case spv::OpBitFieldSExtract:
+			case spv::OpBitFieldUExtract:
+			{
+				auto offset = GenericValue(this, state, insn.word(4)).UInt(0);
+				auto count = GenericValue(this, state, insn.word(5)).UInt(0);
+				auto one = SIMD::UInt(1);
+				auto v = src.UInt(i);
+				SIMD::UInt out = (v >> offset) & Bitmask32(count);
+				if(insn.opcode() == spv::OpBitFieldSExtract)
+				{
+					auto sign = out & NthBit32(count - one);
+					auto sext = ~(sign - one);
+					out |= sext;
+				}
+				dst.move(i, out);
+				break;
+			}
+			case spv::OpBitReverse:
+			{
+				// TODO: Add an intrinsic to reactor. Even if there isn't a
+				// single vector instruction, there may be target-dependent
+				// ways to make this faster.
+				// https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+				SIMD::UInt v = src.UInt(i);
+				v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
+				v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
+				v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
+				v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
+				v = (v >> 16) | (v << 16);
+				dst.move(i, v);
+				break;
+			}
+			case spv::OpBitCount:
+				dst.move(i, CountBits(src.UInt(i)));
+				break;
+			case spv::OpSNegate:
+				dst.move(i, -src.Int(i));
+				break;
+			case spv::OpFNegate:
+				dst.move(i, -src.Float(i));
+				break;
+			case spv::OpConvertFToU:
+				dst.move(i, SIMD::UInt(src.Float(i)));
+				break;
+			case spv::OpConvertFToS:
+				dst.move(i, SIMD::Int(src.Float(i)));
+				break;
+			case spv::OpConvertSToF:
+				dst.move(i, SIMD::Float(src.Int(i)));
+				break;
+			case spv::OpConvertUToF:
+				dst.move(i, SIMD::Float(src.UInt(i)));
+				break;
+			case spv::OpBitcast:
+				dst.move(i, src.Float(i));
+				break;
+			case spv::OpIsInf:
+				dst.move(i, IsInf(src.Float(i)));
+				break;
+			case spv::OpIsNan:
+				dst.move(i, IsNan(src.Float(i)));
+				break;
+			case spv::OpDPdx:
+			case spv::OpDPdxCoarse:
+				// Derivative instructions: FS invocations are laid out like so:
+				//    0 1
+				//    2 3
+				static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
+				dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
+				break;
+			case spv::OpDPdy:
+			case spv::OpDPdyCoarse:
+				dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
+				break;
+			case spv::OpFwidth:
+			case spv::OpFwidthCoarse:
+				dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)) + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
+				break;
+			case spv::OpDPdxFine:
+			{
+				auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
+				auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
+				SIMD::Float v = SIMD::Float(firstRow);
+				v = Insert(v, secondRow, 2);
+				v = Insert(v, secondRow, 3);
+				dst.move(i, v);
+				break;
+			}
+			case spv::OpDPdyFine:
+			{
+				auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
+				auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
+				SIMD::Float v = SIMD::Float(firstColumn);
+				v = Insert(v, secondColumn, 1);
+				v = Insert(v, secondColumn, 3);
+				dst.move(i, v);
+				break;
+			}
+			case spv::OpFwidthFine:
+			{
+				auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
+				auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
+				SIMD::Float dpdx = SIMD::Float(firstRow);
+				dpdx = Insert(dpdx, secondRow, 2);
+				dpdx = Insert(dpdx, secondRow, 3);
+				auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
+				auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
+				SIMD::Float dpdy = SIMD::Float(firstColumn);
+				dpdy = Insert(dpdy, secondColumn, 1);
+				dpdy = Insert(dpdy, secondColumn, 3);
+				dst.move(i, Abs(dpdx) + Abs(dpdy));
+				break;
+			}
+			case spv::OpQuantizeToF16:
+			{
+				// Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp
+				auto abs = Abs(src.Float(i));
+				auto sign = src.Int(i) & SIMD::Int(0x80000000);
+				auto isZero = CmpLT(abs, SIMD::Float(0.000061035f));
+				auto isInf = CmpGT(abs, SIMD::Float(65504.0f));
+				auto isNaN = IsNan(abs);
+				auto isInfOrNan = isInf | isNaN;
+				SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000);
+				v &= ~isZero | SIMD::Int(0x80000000);
+				v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v);
+				v |= isNaN & SIMD::Int(0x400000);
+				dst.move(i, v);
+				break;
+			}
+			default:
+				UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
 		}
 	}
 
@@ -328,191 +327,191 @@
 	{
 		switch(insn.opcode())
 		{
-		case spv::OpIAdd:
-			dst.move(i, lhs.Int(i) + rhs.Int(i));
-			break;
-		case spv::OpISub:
-			dst.move(i, lhs.Int(i) - rhs.Int(i));
-			break;
-		case spv::OpIMul:
-			dst.move(i, lhs.Int(i) * rhs.Int(i));
-			break;
-		case spv::OpSDiv:
-		{
-			SIMD::Int a = lhs.Int(i);
-			SIMD::Int b = rhs.Int(i);
-			b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
-			a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
-			dst.move(i, a / b);
-			break;
-		}
-		case spv::OpUDiv:
-		{
-			auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
-			dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
-			break;
-		}
-		case spv::OpSRem:
-		{
-			SIMD::Int a = lhs.Int(i);
-			SIMD::Int b = rhs.Int(i);
-			b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
-			a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
-			dst.move(i, a % b);
-			break;
-		}
-		case spv::OpSMod:
-		{
-			SIMD::Int a = lhs.Int(i);
-			SIMD::Int b = rhs.Int(i);
-			b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
-			a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
-			auto mod = a % b;
-			// If a and b have opposite signs, the remainder operation takes
-			// the sign from a but OpSMod is supposed to take the sign of b.
-			// Adding b will ensure that the result has the correct sign and
-			// that it is still congruent to a modulo b.
-			//
-			// See also http://mathforum.org/library/drmath/view/52343.html
-			auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
-			auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
-			dst.move(i, As<SIMD::Float>(fixedMod));
-			break;
-		}
-		case spv::OpUMod:
-		{
-			auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
-			dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
-			break;
-		}
-		case spv::OpIEqual:
-		case spv::OpLogicalEqual:
-			dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
-			break;
-		case spv::OpINotEqual:
-		case spv::OpLogicalNotEqual:
-			dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
-			break;
-		case spv::OpUGreaterThan:
-			dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
-			break;
-		case spv::OpSGreaterThan:
-			dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
-			break;
-		case spv::OpUGreaterThanEqual:
-			dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
-			break;
-		case spv::OpSGreaterThanEqual:
-			dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
-			break;
-		case spv::OpULessThan:
-			dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
-			break;
-		case spv::OpSLessThan:
-			dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
-			break;
-		case spv::OpULessThanEqual:
-			dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
-			break;
-		case spv::OpSLessThanEqual:
-			dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
-			break;
-		case spv::OpFAdd:
-			dst.move(i, lhs.Float(i) + rhs.Float(i));
-			break;
-		case spv::OpFSub:
-			dst.move(i, lhs.Float(i) - rhs.Float(i));
-			break;
-		case spv::OpFMul:
-			dst.move(i, lhs.Float(i) * rhs.Float(i));
-			break;
-		case spv::OpFDiv:
-			dst.move(i, lhs.Float(i) / rhs.Float(i));
-			break;
-		case spv::OpFMod:
-			// TODO(b/126873455): inaccurate for values greater than 2^24
-			dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
-			break;
-		case spv::OpFRem:
-			dst.move(i, lhs.Float(i) % rhs.Float(i));
-			break;
-		case spv::OpFOrdEqual:
-			dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFUnordEqual:
-			dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFOrdNotEqual:
-			dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFUnordNotEqual:
-			dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFOrdLessThan:
-			dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFUnordLessThan:
-			dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFOrdGreaterThan:
-			dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFUnordGreaterThan:
-			dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFOrdLessThanEqual:
-			dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFUnordLessThanEqual:
-			dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFOrdGreaterThanEqual:
-			dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpFUnordGreaterThanEqual:
-			dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
-			break;
-		case spv::OpShiftRightLogical:
-			dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
-			break;
-		case spv::OpShiftRightArithmetic:
-			dst.move(i, lhs.Int(i) >> rhs.Int(i));
-			break;
-		case spv::OpShiftLeftLogical:
-			dst.move(i, lhs.UInt(i) << rhs.UInt(i));
-			break;
-		case spv::OpBitwiseOr:
-		case spv::OpLogicalOr:
-			dst.move(i, lhs.UInt(i) | rhs.UInt(i));
-			break;
-		case spv::OpBitwiseXor:
-			dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
-			break;
-		case spv::OpBitwiseAnd:
-		case spv::OpLogicalAnd:
-			dst.move(i, lhs.UInt(i) & rhs.UInt(i));
-			break;
-		case spv::OpSMulExtended:
-			// Extended ops: result is a structure containing two members of the same type as lhs & rhs.
-			// In our flat view then, component i is the i'th component of the first member;
-			// component i + N is the i'th component of the second member.
-			dst.move(i, lhs.Int(i) * rhs.Int(i));
-			dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
-			break;
-		case spv::OpUMulExtended:
-			dst.move(i, lhs.UInt(i) * rhs.UInt(i));
-			dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
-			break;
-		case spv::OpIAddCarry:
-			dst.move(i, lhs.UInt(i) + rhs.UInt(i));
-			dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31);
-			break;
-		case spv::OpISubBorrow:
-			dst.move(i, lhs.UInt(i) - rhs.UInt(i));
-			dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31);
-			break;
-		default:
-			UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
+			case spv::OpIAdd:
+				dst.move(i, lhs.Int(i) + rhs.Int(i));
+				break;
+			case spv::OpISub:
+				dst.move(i, lhs.Int(i) - rhs.Int(i));
+				break;
+			case spv::OpIMul:
+				dst.move(i, lhs.Int(i) * rhs.Int(i));
+				break;
+			case spv::OpSDiv:
+			{
+				SIMD::Int a = lhs.Int(i);
+				SIMD::Int b = rhs.Int(i);
+				b = b | CmpEQ(b, SIMD::Int(0));                                       // prevent divide-by-zero
+				a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1)));  // prevent integer overflow
+				dst.move(i, a / b);
+				break;
+			}
+			case spv::OpUDiv:
+			{
+				auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
+				dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
+				break;
+			}
+			case spv::OpSRem:
+			{
+				SIMD::Int a = lhs.Int(i);
+				SIMD::Int b = rhs.Int(i);
+				b = b | CmpEQ(b, SIMD::Int(0));                                       // prevent divide-by-zero
+				a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1)));  // prevent integer overflow
+				dst.move(i, a % b);
+				break;
+			}
+			case spv::OpSMod:
+			{
+				SIMD::Int a = lhs.Int(i);
+				SIMD::Int b = rhs.Int(i);
+				b = b | CmpEQ(b, SIMD::Int(0));                                       // prevent divide-by-zero
+				a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1)));  // prevent integer overflow
+				auto mod = a % b;
+				// If a and b have opposite signs, the remainder operation takes
+				// the sign from a but OpSMod is supposed to take the sign of b.
+				// Adding b will ensure that the result has the correct sign and
+				// that it is still congruent to a modulo b.
+				//
+				// See also http://mathforum.org/library/drmath/view/52343.html
+				auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
+				auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
+				dst.move(i, As<SIMD::Float>(fixedMod));
+				break;
+			}
+			case spv::OpUMod:
+			{
+				auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
+				dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
+				break;
+			}
+			case spv::OpIEqual:
+			case spv::OpLogicalEqual:
+				dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
+				break;
+			case spv::OpINotEqual:
+			case spv::OpLogicalNotEqual:
+				dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
+				break;
+			case spv::OpUGreaterThan:
+				dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
+				break;
+			case spv::OpSGreaterThan:
+				dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
+				break;
+			case spv::OpUGreaterThanEqual:
+				dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
+				break;
+			case spv::OpSGreaterThanEqual:
+				dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
+				break;
+			case spv::OpULessThan:
+				dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
+				break;
+			case spv::OpSLessThan:
+				dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
+				break;
+			case spv::OpULessThanEqual:
+				dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
+				break;
+			case spv::OpSLessThanEqual:
+				dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
+				break;
+			case spv::OpFAdd:
+				dst.move(i, lhs.Float(i) + rhs.Float(i));
+				break;
+			case spv::OpFSub:
+				dst.move(i, lhs.Float(i) - rhs.Float(i));
+				break;
+			case spv::OpFMul:
+				dst.move(i, lhs.Float(i) * rhs.Float(i));
+				break;
+			case spv::OpFDiv:
+				dst.move(i, lhs.Float(i) / rhs.Float(i));
+				break;
+			case spv::OpFMod:
+				// TODO(b/126873455): inaccurate for values greater than 2^24
+				dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
+				break;
+			case spv::OpFRem:
+				dst.move(i, lhs.Float(i) % rhs.Float(i));
+				break;
+			case spv::OpFOrdEqual:
+				dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFUnordEqual:
+				dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFOrdNotEqual:
+				dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFUnordNotEqual:
+				dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFOrdLessThan:
+				dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFUnordLessThan:
+				dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFOrdGreaterThan:
+				dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFUnordGreaterThan:
+				dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFOrdLessThanEqual:
+				dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFUnordLessThanEqual:
+				dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFOrdGreaterThanEqual:
+				dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpFUnordGreaterThanEqual:
+				dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
+				break;
+			case spv::OpShiftRightLogical:
+				dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
+				break;
+			case spv::OpShiftRightArithmetic:
+				dst.move(i, lhs.Int(i) >> rhs.Int(i));
+				break;
+			case spv::OpShiftLeftLogical:
+				dst.move(i, lhs.UInt(i) << rhs.UInt(i));
+				break;
+			case spv::OpBitwiseOr:
+			case spv::OpLogicalOr:
+				dst.move(i, lhs.UInt(i) | rhs.UInt(i));
+				break;
+			case spv::OpBitwiseXor:
+				dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
+				break;
+			case spv::OpBitwiseAnd:
+			case spv::OpLogicalAnd:
+				dst.move(i, lhs.UInt(i) & rhs.UInt(i));
+				break;
+			case spv::OpSMulExtended:
+				// Extended ops: result is a structure containing two members of the same type as lhs & rhs.
+				// In our flat view then, component i is the i'th component of the first member;
+				// component i + N is the i'th component of the second member.
+				dst.move(i, lhs.Int(i) * rhs.Int(i));
+				dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
+				break;
+			case spv::OpUMulExtended:
+				dst.move(i, lhs.UInt(i) * rhs.UInt(i));
+				dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
+				break;
+			case spv::OpIAddCarry:
+				dst.move(i, lhs.UInt(i) + rhs.UInt(i));
+				dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31);
+				break;
+			case spv::OpISubBorrow:
+				dst.move(i, lhs.UInt(i) - rhs.UInt(i));
+				dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31);
+				break;
+			default:
+				UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
 		}
 	}
 
@@ -532,7 +531,7 @@
 	return EmitResult::Continue;
 }
 
-SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
+SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const &x, GenericValue const &y) const
 {
 	SIMD::Float d = x.Float(0) * y.Float(0);
 
diff --git a/src/Pipeline/SpirvShaderControlFlow.cpp b/src/Pipeline/SpirvShaderControlFlow.cpp
index e7b1fd7..5f7a1b1 100644
--- a/src/Pipeline/SpirvShaderControlFlow.cpp
+++ b/src/Pipeline/SpirvShaderControlFlow.cpp
@@ -14,7 +14,7 @@
 
 #include "SpirvShader.hpp"
 
-#include "Reactor/Coroutine.hpp" // rr::Yield
+#include "Reactor/Coroutine.hpp"  // rr::Yield
 
 #include "ShaderCore.hpp"
 
@@ -24,7 +24,9 @@
 
 namespace sw {
 
-SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
+SpirvShader::Block::Block(InsnIterator begin, InsnIterator end)
+    : begin_(begin)
+    , end_(end)
 {
 	// Default to a Simple, this may change later.
 	kind = Block::Simple;
@@ -111,7 +113,7 @@
 	}
 }
 
-void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable) const
+void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set &reachable) const
 {
 	if(reachable.count(id) == 0)
 	{
@@ -156,8 +158,8 @@
 	auto block = getBlock(blockId);
 	for(auto dep : block.ins)
 	{
-		if(block.kind != Block::Loop ||                 // if not a loop...
-			!ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
+		if(block.kind != Block::Loop ||                  // if not a loop...
+		   !ExistsPath(blockId, dep, block.mergeBlock))  // or a loop and not a loop back edge
 		{
 			f(dep);
 		}
@@ -196,7 +198,7 @@
 
 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
 {
-	auto edge = Block::Edge{from, to};
+	auto edge = Block::Edge{ from, to };
 	auto it = edgeActiveLaneMasks.find(edge);
 	if(it == edgeActiveLaneMasks.end())
 	{
@@ -212,7 +214,7 @@
 
 RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
 {
-	auto edge = Block::Edge{from, to};
+	auto edge = Block::Edge{ from, to };
 	auto it = state->edgeActiveLaneMasks.find(edge);
 	ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
 	return it->second;
@@ -239,8 +241,7 @@
 
 		// Ensure all dependency blocks have been generated.
 		auto depsDone = true;
-		function.ForeachBlockDependency(id, [&](Block::ID dep)
-		{
+		function.ForeachBlockDependency(id, [&](Block::ID dep) {
 			if(state->visited.count(dep) == 0)
 			{
 				state->pending->push_front(dep);
@@ -287,7 +288,7 @@
 
 	if(!state->visited.emplace(blockId).second)
 	{
-		return; // Already generated this block.
+		return;  // Already generated this block.
 	}
 
 	if(blockId != function.entry)
@@ -323,7 +324,7 @@
 
 	if(!state->visited.emplace(blockId).second)
 	{
-		return; // Already emitted this loop.
+		return;  // Already emitted this loop.
 	}
 
 	// Gather all the blocks that make up the loop.
@@ -414,7 +415,7 @@
 	// Add active lanes to the merge lane mask.
 	for(auto in : function.getBlock(mergeBlockId).ins)
 	{
-		auto edge = Block::Edge{in, mergeBlockId};
+		auto edge = Block::Edge{ in, mergeBlockId };
 		auto it = state->edgeActiveLaneMasks.find(edge);
 		if(it != state->edgeActiveLaneMasks.end())
 		{
@@ -563,9 +564,9 @@
 SpirvShader::EmitResult SpirvShader::EmitFunctionCall(InsnIterator insn, EmitState *state) const
 {
 	auto functionId = Function::ID(insn.word(3));
-	const auto& functionIt = functions.find(functionId);
+	const auto &functionIt = functions.find(functionId);
 	ASSERT(functionIt != functions.end());
-	auto& function = functionIt->second;
+	auto &function = functionIt->second;
 
 	// TODO(b/141246700): Add full support for spv::OpFunctionCall
 	// The only supported function is a single OpKill wrapped in a
@@ -608,15 +609,15 @@
 
 	switch(executionScope)
 	{
-	case spv::ScopeWorkgroup:
-		Yield(YieldResult::ControlBarrier);
-		break;
-	case spv::ScopeSubgroup:
-		break;
-	default:
-		// See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
-		UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
-		break;
+		case spv::ScopeWorkgroup:
+			Yield(YieldResult::ControlBarrier);
+			break;
+		case spv::ScopeSubgroup:
+			break;
+		default:
+			// See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
+			UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
+			break;
 	}
 
 	return EmitResult::Continue;
@@ -654,7 +655,7 @@
 	}
 }
 
-void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
+void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const
 {
 	auto typeId = Type::ID(insn.word(1));
 	auto type = getType(typeId);
@@ -688,7 +689,7 @@
 {
 	if(semantics == spv::MemorySemanticsMaskNone)
 	{
-		return; //no-op
+		return;  //no-op
 	}
 	rr::Fence(MemoryOrder(semantics));
 }
diff --git a/src/Pipeline/SpirvShaderEnumNames.cpp b/src/Pipeline/SpirvShaderEnumNames.cpp
index 34cadd4..39a0bf0 100644
--- a/src/Pipeline/SpirvShaderEnumNames.cpp
+++ b/src/Pipeline/SpirvShaderEnumNames.cpp
@@ -14,15 +14,15 @@
 
 // This file contains code used to aid debugging.
 
-#include <spirv/unified1/spirv.h>
 #include "SpirvShader.hpp"
+#include <spirv/unified1/spirv.h>
 
 // Prototypes for SPIRV-Tools functions that do not have public headers.
 // This is a C++ function, so the name is mangled, and signature changes will
 // result in a linker error instead of runtime signature mismatches.
 
 // Gets the name of an instruction, without the "Op" prefix.
-extern const char* spvOpcodeString(const SpvOp opcode);
+extern const char *spvOpcodeString(const SpvOp opcode);
 
 namespace sw {
 
@@ -31,4 +31,4 @@
 	return spvOpcodeString(static_cast<SpvOp>(op));
 }
 
-} // namespace sw
+}  // namespace sw
diff --git a/src/Pipeline/SpirvShaderGLSLstd450.cpp b/src/Pipeline/SpirvShaderGLSLstd450.cpp
index 50ae6a5..f6aaeca 100644
--- a/src/Pipeline/SpirvShaderGLSLstd450.cpp
+++ b/src/Pipeline/SpirvShaderGLSLstd450.cpp
@@ -16,12 +16,11 @@
 
 #include "ShaderCore.hpp"
 
-#include <spirv/unified1/spirv.hpp>
 #include <spirv/unified1/GLSL.std.450.h>
+#include <spirv/unified1/spirv.hpp>
 
-namespace
-{
-	constexpr float PI = 3.141592653589793f;
+namespace {
+constexpr float PI = 3.141592653589793f;
 }
 
 namespace sw {
@@ -34,915 +33,925 @@
 
 	switch(extInstIndex)
 	{
-	case GLSLstd450FAbs:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
+		case GLSLstd450FAbs:
 		{
-			dst.move(i, Abs(src.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450SAbs:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Abs(src.Int(i)));
-		}
-		break;
-	}
-	case GLSLstd450Cross:
-	{
-		auto lhs = GenericValue(this, state, insn.word(5));
-		auto rhs = GenericValue(this, state, insn.word(6));
-		dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
-		dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
-		dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
-		break;
-	}
-	case GLSLstd450Floor:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Floor(src.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Trunc:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Trunc(src.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Ceil:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Ceil(src.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Fract:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Frac(src.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Round:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Round(src.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450RoundEven:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			auto x = Round(src.Float(i));
-			// dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
-			dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
-					SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
-		}
-		break;
-	}
-	case GLSLstd450FMin:
-	{
-		auto lhs = GenericValue(this, state, insn.word(5));
-		auto rhs = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450FMax:
-	{
-		auto lhs = GenericValue(this, state, insn.word(5));
-		auto rhs = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450SMin:
-	{
-		auto lhs = GenericValue(this, state, insn.word(5));
-		auto rhs = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
-		}
-		break;
-	}
-	case GLSLstd450SMax:
-	{
-		auto lhs = GenericValue(this, state, insn.word(5));
-		auto rhs = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
-		}
-		break;
-	}
-	case GLSLstd450UMin:
-	{
-		auto lhs = GenericValue(this, state, insn.word(5));
-		auto rhs = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
-		}
-		break;
-	}
-	case GLSLstd450UMax:
-	{
-		auto lhs = GenericValue(this, state, insn.word(5));
-		auto rhs = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
-		}
-		break;
-	}
-	case GLSLstd450Step:
-	{
-		auto edge = GenericValue(this, state, insn.word(5));
-		auto x = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
-		}
-		break;
-	}
-	case GLSLstd450SmoothStep:
-	{
-		auto edge0 = GenericValue(this, state, insn.word(5));
-		auto edge1 = GenericValue(this, state, insn.word(6));
-		auto x = GenericValue(this, state, insn.word(7));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
-					(edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
-			dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
-		}
-		break;
-	}
-	case GLSLstd450FMix:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto y = GenericValue(this, state, insn.word(6));
-		auto a = GenericValue(this, state, insn.word(7));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
-		}
-		break;
-	}
-	case GLSLstd450FClamp:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto minVal = GenericValue(this, state, insn.word(6));
-		auto maxVal = GenericValue(this, state, insn.word(7));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450SClamp:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto minVal = GenericValue(this, state, insn.word(6));
-		auto maxVal = GenericValue(this, state, insn.word(7));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
-		}
-		break;
-	}
-	case GLSLstd450UClamp:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto minVal = GenericValue(this, state, insn.word(6));
-		auto maxVal = GenericValue(this, state, insn.word(7));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
-		}
-		break;
-	}
-	case GLSLstd450FSign:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
-			auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
-			dst.move(i, neg | pos);
-		}
-		break;
-	}
-	case GLSLstd450SSign:
-	{
-		auto src = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
-			auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
-			dst.move(i, neg | pos);
-		}
-		break;
-	}
-	case GLSLstd450Reflect:
-	{
-		auto I = GenericValue(this, state, insn.word(5));
-		auto N = GenericValue(this, state, insn.word(6));
-
-		SIMD::Float d = Dot(type.sizeInComponents, I, N);
-
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
-		}
-		break;
-	}
-	case GLSLstd450Refract:
-	{
-		auto I = GenericValue(this, state, insn.word(5));
-		auto N = GenericValue(this, state, insn.word(6));
-		auto eta = GenericValue(this, state, insn.word(7));
-
-		SIMD::Float d = Dot(type.sizeInComponents, I, N);
-		SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
-		SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
-		SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
-
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450FaceForward:
-	{
-		auto N = GenericValue(this, state, insn.word(5));
-		auto I = GenericValue(this, state, insn.word(6));
-		auto Nref = GenericValue(this, state, insn.word(7));
-
-		SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
-		SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
-
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			auto n = N.Float(i);
-			dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
-		}
-		break;
-	}
-	case GLSLstd450Length:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
-
-		dst.move(0, Sqrt(d));
-		break;
-	}
-	case GLSLstd450Normalize:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
-		SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
-
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, invLength * x.Float(i));
-		}
-		break;
-	}
-	case GLSLstd450Distance:
-	{
-		auto p0 = GenericValue(this, state, insn.word(5));
-		auto p1 = GenericValue(this, state, insn.word(6));
-		auto p0Type = getType(p0.type);
-
-		// sqrt(dot(p0-p1, p0-p1))
-		SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
-
-		for(auto i = 1u; i < p0Type.sizeInComponents; i++)
-		{
-			d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
-		}
-
-		dst.move(0, Sqrt(d));
-		break;
-	}
-	case GLSLstd450Modf:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		auto ptrId = Object::ID(insn.word(6));
-		auto ptrTy = getType(getObject(ptrId).type);
-		auto ptr = GetPointerToData(ptrId, 0, state);
-		bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
-		// TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
-		// to be in bounds even for inactive lanes.
-		// - Clarify the SPIR-V spec.
-		// - Eliminate lane masking and assume interleaving.
-		auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
-
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			SIMD::Float whole, frac;
-			std::tie(whole, frac) = Modf(val.Float(i));
-			dst.move(i, frac);
-			auto p = ptr + (i * sizeof(float));
-			if(interleavedByLane) { p = InterleaveByLane(p); }
-			p.Store(whole, robustness, state->activeLaneMask());
-		}
-		break;
-	}
-	case GLSLstd450ModfStruct:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		auto valTy = getType(val.type);
-
-		for(auto i = 0u; i < valTy.sizeInComponents; i++)
-		{
-			SIMD::Float whole, frac;
-			std::tie(whole, frac) = Modf(val.Float(i));
-			dst.move(i, frac);
-			dst.move(i + valTy.sizeInComponents, whole);
-		}
-		break;
-	}
-	case GLSLstd450PackSnorm4x8:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
-						SIMD::Int(0xFF)) |
-					((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
-						SIMD::Int(0xFF)) << 8) |
-					((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
-						SIMD::Int(0xFF)) << 16) |
-					((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
-						SIMD::Int(0xFF)) << 24));
-		break;
-	}
-	case GLSLstd450PackUnorm4x8:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
-					((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
-					((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
-					((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
-		break;
-	}
-	case GLSLstd450PackSnorm2x16:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
-						SIMD::Int(0xFFFF)) |
-					((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
-						SIMD::Int(0xFFFF)) << 16));
-		break;
-	}
-	case GLSLstd450PackUnorm2x16:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
-						SIMD::UInt(0xFFFF)) |
-					((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
-						SIMD::UInt(0xFFFF)) << 16));
-		break;
-	}
-	case GLSLstd450PackHalf2x16:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
-		break;
-	}
-	case GLSLstd450UnpackSnorm4x8:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		dst.move(0, Min(Max(SIMD::Float(((val.Int(0)<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		dst.move(1, Min(Max(SIMD::Float(((val.Int(0)<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		dst.move(2, Min(Max(SIMD::Float(((val.Int(0)<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		break;
-	}
-	case GLSLstd450UnpackUnorm4x8:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(1, SIMD::Float(((val.UInt(0)>>8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(2, SIMD::Float(((val.UInt(0)>>16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(3, SIMD::Float(((val.UInt(0)>>24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
-		break;
-	}
-	case GLSLstd450UnpackSnorm2x16:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		// clamp(f / 32767.0, -1.0, 1.0)
-		dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
-							SIMD::Float(1.0f / float(0x7FFF0000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
-							SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		break;
-	}
-	case GLSLstd450UnpackUnorm2x16:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		// f / 65535.0
-		dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
-		dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
-		break;
-	}
-	case GLSLstd450UnpackHalf2x16:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
-		dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
-		break;
-	}
-	case GLSLstd450Fma:
-	{
-		auto a = GenericValue(this, state, insn.word(5));
-		auto b = GenericValue(this, state, insn.word(6));
-		auto c = GenericValue(this, state, insn.word(7));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Frexp:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		auto ptrId = Object::ID(insn.word(6));
-		auto ptrTy = getType(getObject(ptrId).type);
-		auto ptr = GetPointerToData(ptrId, 0, state);
-		bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
-		// TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
-		// to be in bounds even for inactive lanes.
-		// - Clarify the SPIR-V spec.
-		// - Eliminate lane masking and assume interleaving.
-		auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
-
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			SIMD::Float significand;
-			SIMD::Int exponent;
-			std::tie(significand, exponent) = Frexp(val.Float(i));
-
-			dst.move(i, significand);
-
-			auto p = ptr + (i * sizeof(float));
-			if(interleavedByLane) { p = InterleaveByLane(p); }
-			p.Store(exponent, robustness, state->activeLaneMask());
-		}
-		break;
-	}
-	case GLSLstd450FrexpStruct:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		auto numComponents = getType(val.type).sizeInComponents;
-		for(auto i = 0u; i < numComponents; i++)
-		{
-			auto significandAndExponent = Frexp(val.Float(i));
-			dst.move(i, significandAndExponent.first);
-			dst.move(i + numComponents, significandAndExponent.second);
-		}
-		break;
-	}
-	case GLSLstd450Ldexp:
-	{
-		auto significand = GenericValue(this, state, insn.word(5));
-		auto exponent = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			// Assumes IEEE 754
-			auto in = significand.Float(i);
-			auto significandExponent = Exponent(in);
-			auto combinedExponent = exponent.Int(i) + significandExponent;
-			auto isSignificandZero	 = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
-			auto isSignificandInf	  = SIMD::UInt(IsInf(in));
-			auto isSignificandNaN	  = SIMD::UInt(IsNan(in));
-			auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
-			auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
-			auto isExponentInBounds	= isExponentNotTooSmall & isExponentNotTooLarge;
-
-			SIMD::UInt v;
-			v  = significand.UInt(i) & SIMD::UInt(0x7FFFFF); // Add significand.
-			v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23)); // Add exponent.
-			v &= isExponentInBounds; // Clear v if the exponent is OOB.
-
-			v |= significand.UInt(i) & SIMD::UInt(0x80000000); // Add sign bit.
-			v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000); // Mark as inf if the exponent is too great.
-
-			// If the input significand is zero, inf or nan, just return the
-			// input significand.
-			auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
-			v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
-
-			dst.move(i, As<SIMD::Float>(v));
-		}
-		break;
-	}
-	case GLSLstd450Radians:
-	{
-		auto degrees = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
-		}
-		break;
-	}
-	case GLSLstd450Degrees:
-	{
-		auto radians = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
-		}
-		break;
-	}
-	case GLSLstd450Sin:
-	{
-		auto radians = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Sin(radians.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Cos:
-	{
-		auto radians = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Cos(radians.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Tan:
-	{
-		auto radians = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Tan(radians.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Asin:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Asin(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Acos:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Acos(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Atan:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Atan(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Sinh:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Sinh(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Cosh:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Cosh(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Tanh:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Tanh(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Asinh:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Asinh(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Acosh:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Acosh(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Atanh:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Atanh(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Atan2:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto y = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Atan2(x.Float(i), y.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Pow:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto y = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Pow(x.Float(i), y.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Exp:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Exp(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Log:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Log(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Exp2:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Exp2(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Log2:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Log2(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450Sqrt:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, Sqrt(val.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450InverseSqrt:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		Decorations d;
-		ApplyDecorationsForId(&d, insn.word(5));
-		if(d.RelaxedPrecision)
-		{
+			auto src = GenericValue(this, state, insn.word(5));
 			for(auto i = 0u; i < type.sizeInComponents; i++)
 			{
-				dst.move(i, RcpSqrt_pp(val.Float(i)));
+				dst.move(i, Abs(src.Float(i)));
 			}
+			break;
 		}
-		else
+		case GLSLstd450SAbs:
 		{
+			auto src = GenericValue(this, state, insn.word(5));
 			for(auto i = 0u; i < type.sizeInComponents; i++)
 			{
-				dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i)));
-			}
-		}
-		break;
-	}
-	case GLSLstd450Determinant:
-	{
-		auto mat = GenericValue(this, state, insn.word(5));
-		auto numComponents = getType(mat.type).sizeInComponents;
-		switch(numComponents)
-		{
-		case 4: // 2x2
-			dst.move(0, Determinant(
-				mat.Float(0), mat.Float(1),
-				mat.Float(2), mat.Float(3)));
-			break;
-		case 9: // 3x3
-			dst.move(0, Determinant(
-				mat.Float(0), mat.Float(1), mat.Float(2),
-				mat.Float(3), mat.Float(4), mat.Float(5),
-				mat.Float(6), mat.Float(7), mat.Float(8)));
-			break;
-		case 16: // 4x4
-			dst.move(0, Determinant(
-				mat.Float(0),  mat.Float(1),  mat.Float(2),  mat.Float(3),
-				mat.Float(4),  mat.Float(5),  mat.Float(6),  mat.Float(7),
-				mat.Float(8),  mat.Float(9),  mat.Float(10), mat.Float(11),
-				mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
-			break;
-		default:
-			UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents));
-		}
-		break;
-	}
-	case GLSLstd450MatrixInverse:
-	{
-		auto mat = GenericValue(this, state, insn.word(5));
-		auto numComponents = getType(mat.type).sizeInComponents;
-		switch(numComponents)
-		{
-		case 4: // 2x2
-		{
-			auto inv = MatrixInverse(
-				mat.Float(0), mat.Float(1),
-				mat.Float(2), mat.Float(3));
-			for(uint32_t i = 0; i < inv.size(); i++)
-			{
-				dst.move(i, inv[i]);
+				dst.move(i, Abs(src.Int(i)));
 			}
 			break;
 		}
-		case 9: // 3x3
+		case GLSLstd450Cross:
 		{
-			auto inv = MatrixInverse(
-				mat.Float(0), mat.Float(1), mat.Float(2),
-				mat.Float(3), mat.Float(4), mat.Float(5),
-				mat.Float(6), mat.Float(7), mat.Float(8));
-			for(uint32_t i = 0; i < inv.size(); i++)
+			auto lhs = GenericValue(this, state, insn.word(5));
+			auto rhs = GenericValue(this, state, insn.word(6));
+			dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
+			dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
+			dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
+			break;
+		}
+		case GLSLstd450Floor:
+		{
+			auto src = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
 			{
-				dst.move(i, inv[i]);
+				dst.move(i, Floor(src.Float(i)));
 			}
 			break;
 		}
-		case 16: // 4x4
+		case GLSLstd450Trunc:
 		{
-			auto inv = MatrixInverse(
-				mat.Float(0),  mat.Float(1),  mat.Float(2),  mat.Float(3),
-				mat.Float(4),  mat.Float(5),  mat.Float(6),  mat.Float(7),
-				mat.Float(8),  mat.Float(9),  mat.Float(10), mat.Float(11),
-				mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
-			for(uint32_t i = 0; i < inv.size(); i++)
+			auto src = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
 			{
-				dst.move(i, inv[i]);
+				dst.move(i, Trunc(src.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Ceil:
+		{
+			auto src = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Ceil(src.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Fract:
+		{
+			auto src = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Frac(src.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Round:
+		{
+			auto src = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Round(src.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450RoundEven:
+		{
+			auto src = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				auto x = Round(src.Float(i));
+				// dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
+				dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
+				                    SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
+			}
+			break;
+		}
+		case GLSLstd450FMin:
+		{
+			auto lhs = GenericValue(this, state, insn.word(5));
+			auto rhs = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450FMax:
+		{
+			auto lhs = GenericValue(this, state, insn.word(5));
+			auto rhs = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450SMin:
+		{
+			auto lhs = GenericValue(this, state, insn.word(5));
+			auto rhs = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
+			}
+			break;
+		}
+		case GLSLstd450SMax:
+		{
+			auto lhs = GenericValue(this, state, insn.word(5));
+			auto rhs = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
+			}
+			break;
+		}
+		case GLSLstd450UMin:
+		{
+			auto lhs = GenericValue(this, state, insn.word(5));
+			auto rhs = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
+			}
+			break;
+		}
+		case GLSLstd450UMax:
+		{
+			auto lhs = GenericValue(this, state, insn.word(5));
+			auto rhs = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
+			}
+			break;
+		}
+		case GLSLstd450Step:
+		{
+			auto edge = GenericValue(this, state, insn.word(5));
+			auto x = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
+			}
+			break;
+		}
+		case GLSLstd450SmoothStep:
+		{
+			auto edge0 = GenericValue(this, state, insn.word(5));
+			auto edge1 = GenericValue(this, state, insn.word(6));
+			auto x = GenericValue(this, state, insn.word(7));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
+				                      (edge1.Float(i) - edge0.Float(i)),
+				                  SIMD::Float(0.0f)),
+				              SIMD::Float(1.0f));
+				dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
+			}
+			break;
+		}
+		case GLSLstd450FMix:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto y = GenericValue(this, state, insn.word(6));
+			auto a = GenericValue(this, state, insn.word(7));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
+			}
+			break;
+		}
+		case GLSLstd450FClamp:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto minVal = GenericValue(this, state, insn.word(6));
+			auto maxVal = GenericValue(this, state, insn.word(7));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450SClamp:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto minVal = GenericValue(this, state, insn.word(6));
+			auto maxVal = GenericValue(this, state, insn.word(7));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
+			}
+			break;
+		}
+		case GLSLstd450UClamp:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto minVal = GenericValue(this, state, insn.word(6));
+			auto maxVal = GenericValue(this, state, insn.word(7));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
+			}
+			break;
+		}
+		case GLSLstd450FSign:
+		{
+			auto src = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
+				auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
+				dst.move(i, neg | pos);
+			}
+			break;
+		}
+		case GLSLstd450SSign:
+		{
+			auto src = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
+				auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
+				dst.move(i, neg | pos);
+			}
+			break;
+		}
+		case GLSLstd450Reflect:
+		{
+			auto I = GenericValue(this, state, insn.word(5));
+			auto N = GenericValue(this, state, insn.word(6));
+
+			SIMD::Float d = Dot(type.sizeInComponents, I, N);
+
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
+			}
+			break;
+		}
+		case GLSLstd450Refract:
+		{
+			auto I = GenericValue(this, state, insn.word(5));
+			auto N = GenericValue(this, state, insn.word(6));
+			auto eta = GenericValue(this, state, insn.word(7));
+
+			SIMD::Float d = Dot(type.sizeInComponents, I, N);
+			SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
+			SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
+			SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
+
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450FaceForward:
+		{
+			auto N = GenericValue(this, state, insn.word(5));
+			auto I = GenericValue(this, state, insn.word(6));
+			auto Nref = GenericValue(this, state, insn.word(7));
+
+			SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
+			SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
+
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				auto n = N.Float(i);
+				dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
+			}
+			break;
+		}
+		case GLSLstd450Length:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
+
+			dst.move(0, Sqrt(d));
+			break;
+		}
+		case GLSLstd450Normalize:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
+			SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
+
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, invLength * x.Float(i));
+			}
+			break;
+		}
+		case GLSLstd450Distance:
+		{
+			auto p0 = GenericValue(this, state, insn.word(5));
+			auto p1 = GenericValue(this, state, insn.word(6));
+			auto p0Type = getType(p0.type);
+
+			// sqrt(dot(p0-p1, p0-p1))
+			SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
+
+			for(auto i = 1u; i < p0Type.sizeInComponents; i++)
+			{
+				d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
+			}
+
+			dst.move(0, Sqrt(d));
+			break;
+		}
+		case GLSLstd450Modf:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			auto ptrId = Object::ID(insn.word(6));
+			auto ptrTy = getType(getObject(ptrId).type);
+			auto ptr = GetPointerToData(ptrId, 0, state);
+			bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+			// TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
+			// to be in bounds even for inactive lanes.
+			// - Clarify the SPIR-V spec.
+			// - Eliminate lane masking and assume interleaving.
+			auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
+
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				SIMD::Float whole, frac;
+				std::tie(whole, frac) = Modf(val.Float(i));
+				dst.move(i, frac);
+				auto p = ptr + (i * sizeof(float));
+				if(interleavedByLane) { p = InterleaveByLane(p); }
+				p.Store(whole, robustness, state->activeLaneMask());
+			}
+			break;
+		}
+		case GLSLstd450ModfStruct:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			auto valTy = getType(val.type);
+
+			for(auto i = 0u; i < valTy.sizeInComponents; i++)
+			{
+				SIMD::Float whole, frac;
+				std::tie(whole, frac) = Modf(val.Float(i));
+				dst.move(i, frac);
+				dst.move(i + valTy.sizeInComponents, whole);
+			}
+			break;
+		}
+		case GLSLstd450PackSnorm4x8:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+			             SIMD::Int(0xFF)) |
+			                ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+			                  SIMD::Int(0xFF))
+			                 << 8) |
+			                ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+			                  SIMD::Int(0xFF))
+			                 << 16) |
+			                ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+			                  SIMD::Int(0xFF))
+			                 << 24));
+			break;
+		}
+		case GLSLstd450PackUnorm4x8:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
+			                ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
+			                ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
+			                ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
+			break;
+		}
+		case GLSLstd450PackSnorm2x16:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
+			             SIMD::Int(0xFFFF)) |
+			                ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
+			                  SIMD::Int(0xFFFF))
+			                 << 16));
+			break;
+		}
+		case GLSLstd450PackUnorm2x16:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
+			             SIMD::UInt(0xFFFF)) |
+			                ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
+			                  SIMD::UInt(0xFFFF))
+			                 << 16));
+			break;
+		}
+		case GLSLstd450PackHalf2x16:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
+			break;
+		}
+		case GLSLstd450UnpackSnorm4x8:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			dst.move(0, Min(Max(SIMD::Float(((val.Int(0) << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+			dst.move(1, Min(Max(SIMD::Float(((val.Int(0) << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+			dst.move(2, Min(Max(SIMD::Float(((val.Int(0) << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+			dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+			break;
+		}
+		case GLSLstd450UnpackUnorm4x8:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(1, SIMD::Float(((val.UInt(0) >> 8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(2, SIMD::Float(((val.UInt(0) >> 16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(3, SIMD::Float(((val.UInt(0) >> 24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+			break;
+		}
+		case GLSLstd450UnpackSnorm2x16:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			// clamp(f / 32767.0, -1.0, 1.0)
+			dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
+			                        SIMD::Float(1.0f / float(0x7FFF0000)),
+			                    SIMD::Float(-1.0f)),
+			                SIMD::Float(1.0f)));
+			dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
+			                    SIMD::Float(-1.0f)),
+			                SIMD::Float(1.0f)));
+			break;
+		}
+		case GLSLstd450UnpackUnorm2x16:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			// f / 65535.0
+			dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
+			dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
+			break;
+		}
+		case GLSLstd450UnpackHalf2x16:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
+			dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
+			break;
+		}
+		case GLSLstd450Fma:
+		{
+			auto a = GenericValue(this, state, insn.word(5));
+			auto b = GenericValue(this, state, insn.word(6));
+			auto c = GenericValue(this, state, insn.word(7));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Frexp:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			auto ptrId = Object::ID(insn.word(6));
+			auto ptrTy = getType(getObject(ptrId).type);
+			auto ptr = GetPointerToData(ptrId, 0, state);
+			bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+			// TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
+			// to be in bounds even for inactive lanes.
+			// - Clarify the SPIR-V spec.
+			// - Eliminate lane masking and assume interleaving.
+			auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
+
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				SIMD::Float significand;
+				SIMD::Int exponent;
+				std::tie(significand, exponent) = Frexp(val.Float(i));
+
+				dst.move(i, significand);
+
+				auto p = ptr + (i * sizeof(float));
+				if(interleavedByLane) { p = InterleaveByLane(p); }
+				p.Store(exponent, robustness, state->activeLaneMask());
+			}
+			break;
+		}
+		case GLSLstd450FrexpStruct:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			auto numComponents = getType(val.type).sizeInComponents;
+			for(auto i = 0u; i < numComponents; i++)
+			{
+				auto significandAndExponent = Frexp(val.Float(i));
+				dst.move(i, significandAndExponent.first);
+				dst.move(i + numComponents, significandAndExponent.second);
+			}
+			break;
+		}
+		case GLSLstd450Ldexp:
+		{
+			auto significand = GenericValue(this, state, insn.word(5));
+			auto exponent = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				// Assumes IEEE 754
+				auto in = significand.Float(i);
+				auto significandExponent = Exponent(in);
+				auto combinedExponent = exponent.Int(i) + significandExponent;
+				auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
+				auto isSignificandInf = SIMD::UInt(IsInf(in));
+				auto isSignificandNaN = SIMD::UInt(IsNan(in));
+				auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
+				auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
+				auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
+
+				SIMD::UInt v;
+				v = significand.UInt(i) & SIMD::UInt(0x7FFFFF);                          // Add significand.
+				v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23));  // Add exponent.
+				v &= isExponentInBounds;                                                 // Clear v if the exponent is OOB.
+
+				v |= significand.UInt(i) & SIMD::UInt(0x80000000);     // Add sign bit.
+				v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000);  // Mark as inf if the exponent is too great.
+
+				// If the input significand is zero, inf or nan, just return the
+				// input significand.
+				auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
+				v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
+
+				dst.move(i, As<SIMD::Float>(v));
+			}
+			break;
+		}
+		case GLSLstd450Radians:
+		{
+			auto degrees = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
+			}
+			break;
+		}
+		case GLSLstd450Degrees:
+		{
+			auto radians = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
+			}
+			break;
+		}
+		case GLSLstd450Sin:
+		{
+			auto radians = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Sin(radians.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Cos:
+		{
+			auto radians = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Cos(radians.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Tan:
+		{
+			auto radians = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Tan(radians.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Asin:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Asin(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Acos:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Acos(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Atan:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Atan(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Sinh:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Sinh(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Cosh:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Cosh(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Tanh:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Tanh(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Asinh:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Asinh(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Acosh:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Acosh(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Atanh:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Atanh(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Atan2:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto y = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Atan2(x.Float(i), y.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Pow:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto y = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Pow(x.Float(i), y.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Exp:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Exp(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Log:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Log(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Exp2:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Exp2(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Log2:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Log2(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450Sqrt:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, Sqrt(val.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450InverseSqrt:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			Decorations d;
+			ApplyDecorationsForId(&d, insn.word(5));
+			if(d.RelaxedPrecision)
+			{
+				for(auto i = 0u; i < type.sizeInComponents; i++)
+				{
+					dst.move(i, RcpSqrt_pp(val.Float(i)));
+				}
+			}
+			else
+			{
+				for(auto i = 0u; i < type.sizeInComponents; i++)
+				{
+					dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i)));
+				}
+			}
+			break;
+		}
+		case GLSLstd450Determinant:
+		{
+			auto mat = GenericValue(this, state, insn.word(5));
+			auto numComponents = getType(mat.type).sizeInComponents;
+			switch(numComponents)
+			{
+				case 4:  // 2x2
+					dst.move(0, Determinant(
+					                mat.Float(0), mat.Float(1),
+					                mat.Float(2), mat.Float(3)));
+					break;
+				case 9:  // 3x3
+					dst.move(0, Determinant(
+					                mat.Float(0), mat.Float(1), mat.Float(2),
+					                mat.Float(3), mat.Float(4), mat.Float(5),
+					                mat.Float(6), mat.Float(7), mat.Float(8)));
+					break;
+				case 16:  // 4x4
+					dst.move(0, Determinant(
+					                mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
+					                mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
+					                mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
+					                mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
+					break;
+				default:
+					UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents));
+			}
+			break;
+		}
+		case GLSLstd450MatrixInverse:
+		{
+			auto mat = GenericValue(this, state, insn.word(5));
+			auto numComponents = getType(mat.type).sizeInComponents;
+			switch(numComponents)
+			{
+				case 4:  // 2x2
+				{
+					auto inv = MatrixInverse(
+					    mat.Float(0), mat.Float(1),
+					    mat.Float(2), mat.Float(3));
+					for(uint32_t i = 0; i < inv.size(); i++)
+					{
+						dst.move(i, inv[i]);
+					}
+					break;
+				}
+				case 9:  // 3x3
+				{
+					auto inv = MatrixInverse(
+					    mat.Float(0), mat.Float(1), mat.Float(2),
+					    mat.Float(3), mat.Float(4), mat.Float(5),
+					    mat.Float(6), mat.Float(7), mat.Float(8));
+					for(uint32_t i = 0; i < inv.size(); i++)
+					{
+						dst.move(i, inv[i]);
+					}
+					break;
+				}
+				case 16:  // 4x4
+				{
+					auto inv = MatrixInverse(
+					    mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
+					    mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
+					    mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
+					    mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
+					for(uint32_t i = 0; i < inv.size(); i++)
+					{
+						dst.move(i, inv[i]);
+					}
+					break;
+				}
+				default:
+					UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents));
+			}
+			break;
+		}
+		case GLSLstd450IMix:
+		{
+			UNREACHABLE("GLSLstd450IMix has been removed from the specification");
+			break;
+		}
+		case GLSLstd450PackDouble2x32:
+		{
+			UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
+			break;
+		}
+		case GLSLstd450UnpackDouble2x32:
+		{
+			UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
+			break;
+		}
+		case GLSLstd450FindILsb:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				auto v = val.UInt(i);
+				dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
+			}
+			break;
+		}
+		case GLSLstd450FindSMsb:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
+				dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
+			}
+			break;
+		}
+		case GLSLstd450FindUMsb:
+		{
+			auto val = GenericValue(this, state, insn.word(5));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
+			}
+			break;
+		}
+		case GLSLstd450InterpolateAtCentroid:
+		{
+			UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+			break;
+		}
+		case GLSLstd450InterpolateAtSample:
+		{
+			UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+			break;
+		}
+		case GLSLstd450InterpolateAtOffset:
+		{
+			UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+			break;
+		}
+		case GLSLstd450NMin:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto y = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, NMin(x.Float(i), y.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450NMax:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto y = GenericValue(this, state, insn.word(6));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, NMax(x.Float(i), y.Float(i)));
+			}
+			break;
+		}
+		case GLSLstd450NClamp:
+		{
+			auto x = GenericValue(this, state, insn.word(5));
+			auto minVal = GenericValue(this, state, insn.word(6));
+			auto maxVal = GenericValue(this, state, insn.word(7));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
+				dst.move(i, clamp);
 			}
 			break;
 		}
 		default:
-			UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents));
-		}
-		break;
-	}
-	case GLSLstd450IMix:
-	{
-		UNREACHABLE("GLSLstd450IMix has been removed from the specification");
-		break;
-	}
-	case GLSLstd450PackDouble2x32:
-	{
-		UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
-		break;
-	}
-	case GLSLstd450UnpackDouble2x32:
-	{
-		UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
-		break;
-	}
-	case GLSLstd450FindILsb:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			auto v = val.UInt(i);
-			dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
-		}
-		break;
-	}
-	case GLSLstd450FindSMsb:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
-			dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
-		}
-		break;
-	}
-	case GLSLstd450FindUMsb:
-	{
-		auto val = GenericValue(this, state, insn.word(5));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
-		}
-		break;
-	}
-	case GLSLstd450InterpolateAtCentroid:
-	{
-		UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
-		break;
-	}
-	case GLSLstd450InterpolateAtSample:
-	{
-		UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
-		break;
-	}
-	case GLSLstd450InterpolateAtOffset:
-	{
-		UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
-		break;
-	}
-	case GLSLstd450NMin:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto y = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, NMin(x.Float(i), y.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450NMax:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto y = GenericValue(this, state, insn.word(6));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, NMax(x.Float(i), y.Float(i)));
-		}
-		break;
-	}
-	case GLSLstd450NClamp:
-	{
-		auto x = GenericValue(this, state, insn.word(5));
-		auto minVal = GenericValue(this, state, insn.word(6));
-		auto maxVal = GenericValue(this, state, insn.word(7));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
-			dst.move(i, clamp);
-		}
-		break;
-	}
-	default:
-		UNREACHABLE("ExtInst %d", int(extInstIndex));
-		break;
+			UNREACHABLE("ExtInst %d", int(extInstIndex));
+			break;
 	}
 
 	return EmitResult::Continue;
diff --git a/src/Pipeline/SpirvShaderGroup.cpp b/src/Pipeline/SpirvShaderGroup.cpp
index cc46949..77b73ea 100644
--- a/src/Pipeline/SpirvShaderGroup.cpp
+++ b/src/Pipeline/SpirvShaderGroup.cpp
@@ -18,20 +18,21 @@
 
 namespace sw {
 
-struct SpirvShader::GroupOps {
+struct SpirvShader::GroupOps
+{
 
 	// Template function to perform a binary operation.
 	// |TYPE| should be the type of the identity value (as an SIMD::<Type>).
 	// |APPLY| should be a callable object that takes two RValue<TYPE> parameters
 	// and returns a new RValue<TYPE> corresponding to the operation's result.
-	template <typename TYPE, typename APPLY>
+	template<typename TYPE, typename APPLY>
 	static void BinaryOperation(
-		const SpirvShader*               shader,
-		const SpirvShader::InsnIterator& insn,
-		const SpirvShader::EmitState*    state,
-		Intermediate&                    dst,
-		const TYPE&                      identity,
-		APPLY&&                          apply)
+	    const SpirvShader *shader,
+	    const SpirvShader::InsnIterator &insn,
+	    const SpirvShader::EmitState *state,
+	    Intermediate &dst,
+	    const TYPE &identity,
+	    APPLY &&apply)
 	{
 		SpirvShader::GenericValue value(shader, state, insn.word(5));
 		auto &type = shader->getType(SpirvShader::Type::ID(insn.word(1)));
@@ -42,40 +43,38 @@
 			TYPE v = As<TYPE>(v_uint);
 			switch(spv::GroupOperation(insn.word(4)))
 			{
-			case spv::GroupOperationReduce:
-			{
-				// NOTE: floating-point add and multiply are not really commutative so
-				//       ensure that all values in the final lanes are identical
-				TYPE v2 = apply(v.xxzz,  v.yyww);   // [xy]   [xy]   [zw]   [zw]
-				TYPE v3 = apply(v2.xxxx, v2.zzzz);  // [xyzw] [xyzw] [xyzw] [xyzw]
-				dst.move(i, v3);
-				break;
-			}
-			case spv::GroupOperationInclusiveScan:
-			{
-				TYPE v2 = apply(v,  Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */);    // [x] [xy] [yz]  [zw]
-				TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id,  id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
-				dst.move(i, v3);
-				break;
-			}
-			case spv::GroupOperationExclusiveScan:
-			{
-				TYPE v2 = apply(v,  Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */);    // [x] [xy] [yz]  [zw]
-				TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id,  id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
-				auto v4 = Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */);           // [i] [x]  [xy]  [xyz]
-				dst.move(i, v4);
-				break;
-			}
-			default:
-				UNIMPLEMENTED("EmitGroupNonUniform op: %s Group operation: %d",
-								SpirvShader::OpcodeName(type.opcode()).c_str(), insn.word(4));
+				case spv::GroupOperationReduce:
+				{
+					// NOTE: floating-point add and multiply are not really commutative so
+					//       ensure that all values in the final lanes are identical
+					TYPE v2 = apply(v.xxzz, v.yyww);    // [xy]   [xy]   [zw]   [zw]
+					TYPE v3 = apply(v2.xxxx, v2.zzzz);  // [xyzw] [xyzw] [xyzw] [xyzw]
+					dst.move(i, v3);
+					break;
+				}
+				case spv::GroupOperationInclusiveScan:
+				{
+					TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */);      // [x] [xy] [yz]  [zw]
+					TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id,  id, v2.x, v2.y] */);  // [x] [xy] [xyz] [xyzw]
+					dst.move(i, v3);
+					break;
+				}
+				case spv::GroupOperationExclusiveScan:
+				{
+					TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */);      // [x] [xy] [yz]  [zw]
+					TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id,  id, v2.x, v2.y] */);  // [x] [xy] [xyz] [xyzw]
+					auto v4 = Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */);            // [i] [x]  [xy]  [xyz]
+					dst.move(i, v4);
+					break;
+				}
+				default:
+					UNIMPLEMENTED("EmitGroupNonUniform op: %s Group operation: %d",
+					              SpirvShader::OpcodeName(type.opcode()).c_str(), insn.word(4));
 			}
 		}
 	}
-
 };
 
-
 SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
 {
 	static_assert(SIMD::Width == 4, "EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4");
@@ -89,420 +88,404 @@
 
 	switch(insn.opcode())
 	{
-	case spv::OpGroupNonUniformElect:
-	{
-		// Result is true only in the active invocation with the lowest id
-		// in the group, otherwise result is false.
-		SIMD::Int active = state->activeLaneMask();
-		// TODO: Would be nice if we could write this as:
-		//   elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
-		auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-		auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
-		dst.move(0, elect);
-		break;
-	}
-
-	case spv::OpGroupNonUniformAll:
-	{
-		GenericValue predicate(this, state, insn.word(4));
-		dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(state->activeLaneMask())));
-		break;
-	}
-
-	case spv::OpGroupNonUniformAny:
-	{
-		GenericValue predicate(this, state, insn.word(4));
-		dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(state->activeLaneMask())));
-		break;
-	}
-
-	case spv::OpGroupNonUniformAllEqual:
-	{
-		GenericValue value(this, state, insn.word(4));
-		auto res = SIMD::UInt(0xffffffff);
-		SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask());
-		SIMD::UInt inactive = ~active;
-		for(auto i = 0u; i < type.sizeInComponents; i++)
+		case spv::OpGroupNonUniformElect:
 		{
-			SIMD::UInt v = value.UInt(i) & active;
-			SIMD::UInt filled = v;
-			for(int j = 0; j < SIMD::Width - 1; j++)
+			// Result is true only in the active invocation with the lowest id
+			// in the group, otherwise result is false.
+			SIMD::Int active = state->activeLaneMask();
+			// TODO: Would be nice if we could write this as:
+			//   elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
+			auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+			auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
+			dst.move(0, elect);
+			break;
+		}
+
+		case spv::OpGroupNonUniformAll:
+		{
+			GenericValue predicate(this, state, insn.word(4));
+			dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(state->activeLaneMask())));
+			break;
+		}
+
+		case spv::OpGroupNonUniformAny:
+		{
+			GenericValue predicate(this, state, insn.word(4));
+			dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(state->activeLaneMask())));
+			break;
+		}
+
+		case spv::OpGroupNonUniformAllEqual:
+		{
+			GenericValue value(this, state, insn.word(4));
+			auto res = SIMD::UInt(0xffffffff);
+			SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask());
+			SIMD::UInt inactive = ~active;
+			for(auto i = 0u; i < type.sizeInComponents; i++)
 			{
-				filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
+				SIMD::UInt v = value.UInt(i) & active;
+				SIMD::UInt filled = v;
+				for(int j = 0; j < SIMD::Width - 1; j++)
+				{
+					filled |= filled.yzwx & inactive;  // Populate inactive 'holes' with a live value
+				}
+				res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx));
 			}
-			res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx));
-		}
-		dst.move(0, res);
-		break;
-	}
-
-	case spv::OpGroupNonUniformBroadcast:
-	{
-		auto valueId = Object::ID(insn.word(4));
-		auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
-		GenericValue value(this, state, valueId);
-		auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, OrAll(value.Int(i) & mask));
-		}
-		break;
-	}
-
-	case spv::OpGroupNonUniformBroadcastFirst:
-	{
-		auto valueId = Object::ID(insn.word(4));
-		GenericValue value(this, state, valueId);
-		// Result is true only in the active invocation with the lowest id
-		// in the group, otherwise result is false.
-		SIMD::Int active = state->activeLaneMask();
-		// TODO: Would be nice if we could write this as:
-		//   elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
-		auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-		auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			dst.move(i, OrAll(value.Int(i) & elect));
-		}
-		break;
-	}
-
-	case spv::OpGroupNonUniformBallot:
-	{
-		ASSERT(type.sizeInComponents == 4);
-		GenericValue predicate(this, state, insn.word(4));
-		dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0))));
-		dst.move(1, SIMD::Int(0));
-		dst.move(2, SIMD::Int(0));
-		dst.move(3, SIMD::Int(0));
-		break;
-	}
-
-	case spv::OpGroupNonUniformInverseBallot:
-	{
-		auto valueId = Object::ID(insn.word(4));
-		ASSERT(type.sizeInComponents == 1);
-		ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
-		GenericValue value(this, state, valueId);
-		auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1);
-		dst.move(0, -bit);
-		break;
-	}
-
-	case spv::OpGroupNonUniformBallotBitExtract:
-	{
-		auto valueId = Object::ID(insn.word(4));
-		auto indexId = Object::ID(insn.word(5));
-		ASSERT(type.sizeInComponents == 1);
-		ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
-		ASSERT(getType(getObject(indexId).type).sizeInComponents == 1);
-		GenericValue value(this, state, valueId);
-		GenericValue index(this, state, indexId);
-		auto vecIdx = index.Int(0) / SIMD::Int(32);
-		auto bitIdx = index.Int(0) & SIMD::Int(31);
-		auto bits =	(value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) |
-					(value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) |
-					(value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) |
-					(value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3)));
-		dst.move(0, -((bits >> bitIdx) & SIMD::Int(1)));
-		break;
-	}
-
-	case spv::OpGroupNonUniformBallotBitCount:
-	{
-		auto operation = spv::GroupOperation(insn.word(4));
-		auto valueId = Object::ID(insn.word(5));
-		ASSERT(type.sizeInComponents == 1);
-		ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
-		GenericValue value(this, state, valueId);
-		switch(operation)
-		{
-		case spv::GroupOperationReduce:
-			dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15)));
+			dst.move(0, res);
 			break;
-		case spv::GroupOperationInclusiveScan:
-			dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15)));
+		}
+
+		case spv::OpGroupNonUniformBroadcast:
+		{
+			auto valueId = Object::ID(insn.word(4));
+			auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
+			GenericValue value(this, state, valueId);
+			auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, OrAll(value.Int(i) & mask));
+			}
 			break;
-		case spv::GroupOperationExclusiveScan:
-			dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7)));
+		}
+
+		case spv::OpGroupNonUniformBroadcastFirst:
+		{
+			auto valueId = Object::ID(insn.word(4));
+			GenericValue value(this, state, valueId);
+			// Result is true only in the active invocation with the lowest id
+			// in the group, otherwise result is false.
+			SIMD::Int active = state->activeLaneMask();
+			// TODO: Would be nice if we could write this as:
+			//   elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
+			auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+			auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				dst.move(i, OrAll(value.Int(i) & elect));
+			}
 			break;
+		}
+
+		case spv::OpGroupNonUniformBallot:
+		{
+			ASSERT(type.sizeInComponents == 4);
+			GenericValue predicate(this, state, insn.word(4));
+			dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0))));
+			dst.move(1, SIMD::Int(0));
+			dst.move(2, SIMD::Int(0));
+			dst.move(3, SIMD::Int(0));
+			break;
+		}
+
+		case spv::OpGroupNonUniformInverseBallot:
+		{
+			auto valueId = Object::ID(insn.word(4));
+			ASSERT(type.sizeInComponents == 1);
+			ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+			GenericValue value(this, state, valueId);
+			auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1);
+			dst.move(0, -bit);
+			break;
+		}
+
+		case spv::OpGroupNonUniformBallotBitExtract:
+		{
+			auto valueId = Object::ID(insn.word(4));
+			auto indexId = Object::ID(insn.word(5));
+			ASSERT(type.sizeInComponents == 1);
+			ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+			ASSERT(getType(getObject(indexId).type).sizeInComponents == 1);
+			GenericValue value(this, state, valueId);
+			GenericValue index(this, state, indexId);
+			auto vecIdx = index.Int(0) / SIMD::Int(32);
+			auto bitIdx = index.Int(0) & SIMD::Int(31);
+			auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) |
+			            (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) |
+			            (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) |
+			            (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3)));
+			dst.move(0, -((bits >> bitIdx) & SIMD::Int(1)));
+			break;
+		}
+
+		case spv::OpGroupNonUniformBallotBitCount:
+		{
+			auto operation = spv::GroupOperation(insn.word(4));
+			auto valueId = Object::ID(insn.word(5));
+			ASSERT(type.sizeInComponents == 1);
+			ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+			GenericValue value(this, state, valueId);
+			switch(operation)
+			{
+				case spv::GroupOperationReduce:
+					dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15)));
+					break;
+				case spv::GroupOperationInclusiveScan:
+					dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15)));
+					break;
+				case spv::GroupOperationExclusiveScan:
+					dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7)));
+					break;
+				default:
+					UNSUPPORTED("GroupOperation %d", int(operation));
+			}
+			break;
+		}
+
+		case spv::OpGroupNonUniformBallotFindLSB:
+		{
+			auto valueId = Object::ID(insn.word(4));
+			ASSERT(type.sizeInComponents == 1);
+			ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+			GenericValue value(this, state, valueId);
+			dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true));
+			break;
+		}
+
+		case spv::OpGroupNonUniformBallotFindMSB:
+		{
+			auto valueId = Object::ID(insn.word(4));
+			ASSERT(type.sizeInComponents == 1);
+			ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+			GenericValue value(this, state, valueId);
+			dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false));
+			break;
+		}
+
+		case spv::OpGroupNonUniformShuffle:
+		{
+			GenericValue value(this, state, insn.word(4));
+			GenericValue id(this, state, insn.word(5));
+			auto x = CmpEQ(SIMD::Int(0), id.Int(0));
+			auto y = CmpEQ(SIMD::Int(1), id.Int(0));
+			auto z = CmpEQ(SIMD::Int(2), id.Int(0));
+			auto w = CmpEQ(SIMD::Int(3), id.Int(0));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				SIMD::Int v = value.Int(i);
+				dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
+			}
+			break;
+		}
+
+		case spv::OpGroupNonUniformShuffleXor:
+		{
+			GenericValue value(this, state, insn.word(4));
+			GenericValue mask(this, state, insn.word(5));
+			auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
+			auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
+			auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
+			auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				SIMD::Int v = value.Int(i);
+				dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
+			}
+			break;
+		}
+
+		case spv::OpGroupNonUniformShuffleUp:
+		{
+			GenericValue value(this, state, insn.word(4));
+			GenericValue delta(this, state, insn.word(5));
+			auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
+			auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
+			auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
+			auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				SIMD::Int v = value.Int(i);
+				dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx));
+			}
+			break;
+		}
+
+		case spv::OpGroupNonUniformShuffleDown:
+		{
+			GenericValue value(this, state, insn.word(4));
+			GenericValue delta(this, state, insn.word(5));
+			auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
+			auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
+			auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
+			auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
+			for(auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				SIMD::Int v = value.Int(i);
+				dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww));
+			}
+			break;
+		}
+
+		case spv::OpGroupNonUniformIAdd:
+		{
+			using Type = SIMD::Int;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(0),
+			    [](RValue<Type> a, RValue<Type> b) { return a + b; });
+			break;
+		}
+
+		case spv::OpGroupNonUniformFAdd:
+		{
+			using Type = SIMD::Float;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(0.),
+			    [](RValue<Type> a, RValue<Type> b) { return a + b; });
+			break;
+		}
+
+		case spv::OpGroupNonUniformIMul:
+		{
+			using Type = SIMD::Int;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(1),
+			    [](RValue<Type> a, RValue<Type> b) { return a * b; });
+			break;
+		}
+
+		case spv::OpGroupNonUniformFMul:
+		{
+			using Type = SIMD::Float;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(1.),
+			    [](RValue<Type> a, RValue<Type> b) { return a * b; });
+			break;
+		}
+
+		case spv::OpGroupNonUniformBitwiseAnd:
+		{
+			using Type = SIMD::UInt;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(~0u),
+			    [](RValue<Type> a, RValue<Type> b) { return a & b; });
+			break;
+		}
+
+		case spv::OpGroupNonUniformBitwiseOr:
+		{
+			using Type = SIMD::UInt;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(0),
+			    [](RValue<Type> a, RValue<Type> b) { return a | b; });
+			break;
+		}
+
+		case spv::OpGroupNonUniformBitwiseXor:
+		{
+			using Type = SIMD::UInt;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(0),
+			    [](RValue<Type> a, RValue<Type> b) { return a ^ b; });
+			break;
+		}
+
+		case spv::OpGroupNonUniformSMin:
+		{
+			using Type = SIMD::Int;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(INT32_MAX),
+			    [](RValue<Type> a, RValue<Type> b) { return Min(a, b); });
+			break;
+		}
+
+		case spv::OpGroupNonUniformUMin:
+		{
+			using Type = SIMD::UInt;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(~0u),
+			    [](RValue<Type> a, RValue<Type> b) { return Min(a, b); });
+			break;
+		}
+
+		case spv::OpGroupNonUniformFMin:
+		{
+			using Type = SIMD::Float;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type::positive_inf(),
+			    [](RValue<Type> a, RValue<Type> b) { return NMin(a, b); });
+			break;
+		}
+
+		case spv::OpGroupNonUniformSMax:
+		{
+			using Type = SIMD::Int;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(INT32_MIN),
+			    [](RValue<Type> a, RValue<Type> b) { return Max(a, b); });
+			break;
+		}
+
+		case spv::OpGroupNonUniformUMax:
+		{
+			using Type = SIMD::UInt;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(0),
+			    [](RValue<Type> a, RValue<Type> b) { return Max(a, b); });
+			break;
+		}
+
+		case spv::OpGroupNonUniformFMax:
+		{
+			using Type = SIMD::Float;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type::negative_inf(),
+			    [](RValue<Type> a, RValue<Type> b) { return NMax(a, b); });
+			break;
+		}
+
+		case spv::OpGroupNonUniformLogicalAnd:
+		{
+			using Type = SIMD::UInt;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(~0u),
+			    [](RValue<Type> a, RValue<Type> b) {
+				    SIMD::UInt zero = SIMD::UInt(0);
+				    return CmpNEQ(a, zero) & CmpNEQ(b, zero);
+			    });
+			break;
+		}
+
+		case spv::OpGroupNonUniformLogicalOr:
+		{
+			using Type = SIMD::UInt;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(0),
+			    [](RValue<Type> a, RValue<Type> b) {
+				    SIMD::UInt zero = SIMD::UInt(0);
+				    return CmpNEQ(a, zero) | CmpNEQ(b, zero);
+			    });
+			break;
+		}
+
+		case spv::OpGroupNonUniformLogicalXor:
+		{
+			using Type = SIMD::UInt;
+			SpirvShader::GroupOps::BinaryOperation(
+			    this, insn, state, dst,
+			    Type(0),
+			    [](RValue<Type> a, RValue<Type> b) {
+				    SIMD::UInt zero = SIMD::UInt(0);
+				    return CmpNEQ(a, zero) ^ CmpNEQ(b, zero);
+			    });
+			break;
+		}
+
 		default:
-			UNSUPPORTED("GroupOperation %d", int(operation));
-		}
-		break;
-	}
-
-	case spv::OpGroupNonUniformBallotFindLSB:
-	{
-		auto valueId = Object::ID(insn.word(4));
-		ASSERT(type.sizeInComponents == 1);
-		ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
-		GenericValue value(this, state, valueId);
-		dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true));
-		break;
-	}
-
-	case spv::OpGroupNonUniformBallotFindMSB:
-	{
-		auto valueId = Object::ID(insn.word(4));
-		ASSERT(type.sizeInComponents == 1);
-		ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
-		GenericValue value(this, state, valueId);
-		dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false));
-		break;
-	}
-
-	case spv::OpGroupNonUniformShuffle:
-	{
-		GenericValue value(this, state, insn.word(4));
-		GenericValue id(this, state, insn.word(5));
-		auto x = CmpEQ(SIMD::Int(0), id.Int(0));
-		auto y = CmpEQ(SIMD::Int(1), id.Int(0));
-		auto z = CmpEQ(SIMD::Int(2), id.Int(0));
-		auto w = CmpEQ(SIMD::Int(3), id.Int(0));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			SIMD::Int v = value.Int(i);
-			dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
-		}
-		break;
-	}
-
-	case spv::OpGroupNonUniformShuffleXor:
-	{
-		GenericValue value(this, state, insn.word(4));
-		GenericValue mask(this, state, insn.word(5));
-		auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
-		auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
-		auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
-		auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			SIMD::Int v = value.Int(i);
-			dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
-		}
-		break;
-	}
-
-	case spv::OpGroupNonUniformShuffleUp:
-	{
-		GenericValue value(this, state, insn.word(4));
-		GenericValue delta(this, state, insn.word(5));
-		auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
-		auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
-		auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
-		auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			SIMD::Int v = value.Int(i);
-			dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx));
-		}
-		break;
-	}
-
-	case spv::OpGroupNonUniformShuffleDown:
-	{
-		GenericValue value(this, state, insn.word(4));
-		GenericValue delta(this, state, insn.word(5));
-		auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
-		auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
-		auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
-		auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
-		for(auto i = 0u; i < type.sizeInComponents; i++)
-		{
-			SIMD::Int v = value.Int(i);
-			dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww));
-		}
-		break;
-	}
-
-	case spv::OpGroupNonUniformIAdd:
-	{
-		using Type = SIMD::Int;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(0),
-				[](RValue<Type>a, RValue<Type>b){ return a + b; }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformFAdd:
-	{
-		using Type = SIMD::Float;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(0.),
-				[](RValue<Type>a, RValue<Type>b){ return a + b; }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformIMul:
-	{
-		using Type = SIMD::Int;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(1),
-				[](RValue<Type>a, RValue<Type>b){ return a * b; }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformFMul:
-	{
-		using Type = SIMD::Float;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(1.),
-				[](RValue<Type>a, RValue<Type>b){ return a * b; }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformBitwiseAnd:
-	{
-		using Type = SIMD::UInt;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(~0u),
-				[](RValue<Type>a, RValue<Type>b){ return a & b; }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformBitwiseOr:
-	{
-		using Type = SIMD::UInt;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(0),
-				[](RValue<Type>a, RValue<Type>b){ return a | b; }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformBitwiseXor:
-	{
-		using Type = SIMD::UInt;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(0),
-				[](RValue<Type>a, RValue<Type>b){ return a ^ b; }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformSMin:
-	{
-		using Type = SIMD::Int;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(INT32_MAX),
-				[](RValue<Type>a, RValue<Type>b){ return Min(a, b); }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformUMin:
-	{
-		using Type = SIMD::UInt;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(~0u),
-				[](RValue<Type>a, RValue<Type>b){ return Min(a, b); }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformFMin:
-	{
-		using Type = SIMD::Float;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type::positive_inf(),
-				[](RValue<Type>a, RValue<Type>b){ return NMin(a, b); }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformSMax:
-	{
-		using Type = SIMD::Int;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(INT32_MIN),
-				[](RValue<Type>a, RValue<Type>b){ return Max(a, b); }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformUMax:
-	{
-		using Type = SIMD::UInt;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(0),
-				[](RValue<Type>a, RValue<Type>b){ return Max(a, b); }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformFMax:
-	{
-		using Type = SIMD::Float;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type::negative_inf(),
-				[](RValue<Type>a, RValue<Type>b){ return NMax(a, b); }
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformLogicalAnd:
-	{
-		using Type = SIMD::UInt;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(~0u),
-				[](RValue<Type>a, RValue<Type>b){
-					SIMD::UInt zero = SIMD::UInt(0);
-					return CmpNEQ(a, zero) & CmpNEQ(b, zero);
-				}
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformLogicalOr:
-	{
-		using Type = SIMD::UInt;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(0),
-				[](RValue<Type>a, RValue<Type>b){
-					SIMD::UInt zero = SIMD::UInt(0);
-					return CmpNEQ(a, zero) | CmpNEQ(b, zero);
-				}
-		);
-		break;
-	}
-
-	case spv::OpGroupNonUniformLogicalXor:
-	{
-		using Type = SIMD::UInt;
-		SpirvShader::GroupOps::BinaryOperation(
-				this, insn, state, dst,
-				Type(0),
-				[](RValue<Type>a, RValue<Type>b){
-					SIMD::UInt zero = SIMD::UInt(0);
-					return CmpNEQ(a, zero) ^ CmpNEQ(b, zero);
-				}
-		);
-		break;
-	}
-
-	default:
-		UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
+			UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
 	}
 	return EmitResult::Continue;
 }
diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp
index 5e7edcf..1ec6f89 100644
--- a/src/Pipeline/SpirvShaderImage.cpp
+++ b/src/Pipeline/SpirvShaderImage.cpp
@@ -27,26 +27,26 @@
 {
 	switch(format)
 	{
-	case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
-	case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
-	case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
-	case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
-	case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
-	case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
-	case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
-	case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
-	case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
-	case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
-	case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
-	case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
-	case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
-	case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
-	case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
-	case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
+		case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
+		case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
+		case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
+		case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
+		case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
+		case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
+		case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
+		case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
+		case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
+		case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
+		case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
+		case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
+		case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
+		case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
+		case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
+		case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
 
-	default:
-		UNIMPLEMENTED("SPIR-V ImageFormat %u", format);
-		return VK_FORMAT_UNDEFINED;
+		default:
+			UNIMPLEMENTED("SPIR-V ImageFormat %u", format);
+			return VK_FORMAT_UNDEFINED;
 	}
 }
 
@@ -57,21 +57,21 @@
 
 	sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
 
-	return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec)));   // TODO: IfThenElse()
+	return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec)));  // TODO: IfThenElse()
 }
 
-} // anonymous namespace
+}  // anonymous namespace
 
 namespace sw {
 
 SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
 {
-	return EmitImageSample({variant, Implicit}, insn, state);
+	return EmitImageSample({ variant, Implicit }, insn, state);
 }
 
 SpirvShader::EmitResult SpirvShader::EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const
 {
-	ImageInstruction instruction = {variant, Gather};
+	ImageInstruction instruction = { variant, Gather };
 	instruction.gatherComponent = !instruction.isDref() ? getObject(insn.word(5)).constantValue[0] : 0;
 
 	return EmitImageSample(instruction, insn, state);
@@ -85,19 +85,20 @@
 
 	if((imageOperands & spv::ImageOperandsLodMask) == imageOperands)
 	{
-		return EmitImageSample({variant, Lod}, insn, state);
+		return EmitImageSample({ variant, Lod }, insn, state);
 	}
 	else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands)
 	{
-		return EmitImageSample({variant, Grad}, insn, state);
+		return EmitImageSample({ variant, Grad }, insn, state);
 	}
-	else UNIMPLEMENTED("Image Operands %x", imageOperands);
+	else
+		UNIMPLEMENTED("Image Operands %x", imageOperands);
 	return EmitResult::Continue;
 }
 
 SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState *state) const
 {
-	return EmitImageSample({None, Fetch}, insn, state);
+	return EmitImageSample({ None, Fetch }, insn, state);
 }
 
 SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const
@@ -109,18 +110,17 @@
 	auto &resultType = getType(resultTypeId);
 
 	auto &result = state->createIntermediate(resultId, resultType.sizeInComponents);
-	auto imageDescriptor = state->getPointer(sampledImageId).base; // vk::SampledImageDescriptor*
+	auto imageDescriptor = state->getPointer(sampledImageId).base;  // vk::SampledImageDescriptor*
 
 	// If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor
 	auto &sampledImage = getObject(sampledImageId);
-	auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ?
-			state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
+	auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ? state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
 
 	auto coordinate = GenericValue(this, state, coordinateId);
 	auto &coordinateType = getType(coordinate.type);
 
-	Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler*
-	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture);  // sw::Texture*
+	Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler);  // vk::Sampler*
+	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture);    // sw::Texture*
 
 	// Above we assumed that if the SampledImage operand is not the result of an OpSampledImage,
 	// it must be a combined image sampler loaded straight from the descriptor set. For OpImageFetch
@@ -206,7 +206,7 @@
 	instruction.coordinates = coordinates;
 
 	uint32_t i = 0;
-	for( ; i < coordinates; i++)
+	for(; i < coordinates; i++)
 	{
 		if(instruction.isProj())
 		{
@@ -335,7 +335,7 @@
 
 SpirvShader::EmitResult SpirvShader::EmitImageQueryLod(InsnIterator insn, EmitState *state) const
 {
-	return EmitImageSample({None, Query}, insn, state);
+	return EmitImageSample({ None, Query }, insn, state);
 }
 
 void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
@@ -359,23 +359,23 @@
 
 	switch(bindingLayout.descriptorType)
 	{
-	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-	case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-	{
-		extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]*
-		arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t
-		break;
-	}
-	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-	{
-		extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]*
-		arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t
-		break;
-	}
-	default:
-		UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
+		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+		{
+			extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent);                           // int[3]*
+			arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers));  // uint32_t
+			break;
+		}
+		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+		{
+			extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent);                           // int[3]*
+			arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers));  // uint32_t
+			break;
+		}
+		default:
+			UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
 	}
 
 	auto dimensions = resultTy.sizeInComponents - (isArrayed ? 1 : 0);
@@ -421,13 +421,13 @@
 	Int mipLevels = 0;
 	switch(bindingLayout.descriptorType)
 	{
-	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-		mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t
-		break;
-	default:
-		UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
+		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+			mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels));  // uint32_t
+			break;
+		default:
+			UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
 	}
 
 	auto &dst = state->createIntermediate(resultId, 1);
@@ -455,16 +455,16 @@
 	Int sampleCount = 0;
 	switch(bindingLayout.descriptorType)
 	{
-	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t
-		break;
-	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t
-		break;
-	default:
-		UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
+		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+			sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));  // uint32_t
+			break;
+		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+			sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount));  // uint32_t
+			break;
+		default:
+			UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
 	}
 
 	auto &dst = state->createIntermediate(resultId, 1);
@@ -473,7 +473,7 @@
 	return EmitResult::Continue;
 }
 
-SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, SIMD::Pointer ptr, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
+SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, SIMD::Pointer ptr, GenericValue const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
 {
 	auto routine = state->routine;
 	bool isArrayed = imageType.definition.word(5) != 0;
@@ -495,16 +495,16 @@
 	}
 
 	auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
-															? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
-															: OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
+	                                                          ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
+	                                                          : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
 	auto slicePitch = SIMD::Int(
-			*Pointer<Int>(descriptor + (useStencilAspect
-										? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
-										: OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
+	    *Pointer<Int>(descriptor + (useStencilAspect
+	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
+	                                    : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
 	auto samplePitch = SIMD::Int(
-			*Pointer<Int>(descriptor + (useStencilAspect
-										? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
-										: OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
+	    *Pointer<Int>(descriptor + (useStencilAspect
+	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
+	                                    : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
 
 	ptr += u * SIMD::Int(texelSize);
 	if(dims > 1)
@@ -568,13 +568,13 @@
 	// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
 	// the renderpass data instead. In all other cases, we can use the format in the instruction.
 	auto vkFormat = (dim == spv::DimSubpassData)
-					? inputAttachmentFormats[d.InputAttachmentIndex]
-					: SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
+	                    ? inputAttachmentFormats[d.InputAttachmentIndex]
+	                    : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
 
 	// Depth+Stencil image attachments select aspect based on the Sampled Type of the
 	// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
 	auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
-			getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
+	                         getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
 
 	if(useStencilAspect)
 	{
@@ -584,8 +584,8 @@
 	auto pointer = state->getPointer(imageId);
 	Pointer<Byte> binding = pointer.base;
 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect
-			? OFFSET(vk::StorageImageDescriptor, stencilPtr)
-			: OFFSET(vk::StorageImageDescriptor, ptr)));
+	                                                                 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
+	                                                                 : OFFSET(vk::StorageImageDescriptor, ptr)));
 
 	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
 
@@ -604,7 +604,7 @@
 	// Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
 	// of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
 	// TODO: specialize for small formats?
-	for(auto i = 0; i < (texelSize + 3)/4; i++)
+	for(auto i = 0; i < (texelSize + 3) / 4; i++)
 	{
 		packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
 		texelPtr += sizeof(float);
@@ -615,214 +615,214 @@
 	// - Any format supported as a color or depth/stencil attachment, for input attachments
 	switch(vkFormat)
 	{
-	case VK_FORMAT_R32G32B32A32_SFLOAT:
-	case VK_FORMAT_R32G32B32A32_SINT:
-	case VK_FORMAT_R32G32B32A32_UINT:
-		dst.move(0, packed[0]);
-		dst.move(1, packed[1]);
-		dst.move(2, packed[2]);
-		dst.move(3, packed[3]);
-		break;
-	case VK_FORMAT_R32_SINT:
-	case VK_FORMAT_R32_UINT:
-		dst.move(0, packed[0]);
-		// Fill remaining channels with 0,0,1 (of the correct type)
-		dst.move(1, SIMD::Int(0));
-		dst.move(2, SIMD::Int(0));
-		dst.move(3, SIMD::Int(1));
-		break;
-	case VK_FORMAT_R32_SFLOAT:
-	case VK_FORMAT_D32_SFLOAT:
-	case VK_FORMAT_D32_SFLOAT_S8_UINT:
-		dst.move(0, packed[0]);
-		// Fill remaining channels with 0,0,1 (of the correct type)
-		dst.move(1, SIMD::Float(0));
-		dst.move(2, SIMD::Float(0));
-		dst.move(3, SIMD::Float(1));
-		break;
-	case VK_FORMAT_D16_UNORM:
-		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f));
-		dst.move(1, SIMD::Float(0));
-		dst.move(2, SIMD::Float(0));
-		dst.move(3, SIMD::Float(1));
-		break;
-	case VK_FORMAT_R16G16B16A16_SINT:
-		dst.move(0, (packed[0] << 16) >> 16);
-		dst.move(1, (packed[0]) >> 16);
-		dst.move(2, (packed[1] << 16) >> 16);
-		dst.move(3, (packed[1]) >> 16);
-		break;
-	case VK_FORMAT_R16G16B16A16_UINT:
-		dst.move(0, packed[0] & SIMD::Int(0xffff));
-		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
-		dst.move(2, packed[1] & SIMD::Int(0xffff));
-		dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
-		break;
-	case VK_FORMAT_R16G16B16A16_SFLOAT:
-		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
-		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
-		dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
-		dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
-		break;
-	case VK_FORMAT_R8G8B8A8_SNORM:
-		dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		dst.move(1, Min(Max(SIMD::Float(((packed[0]<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		dst.move(2, Min(Max(SIMD::Float(((packed[0]<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
-		break;
-	case VK_FORMAT_R8G8B8A8_UNORM:
-	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-		dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(2, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		break;
-	case VK_FORMAT_R8G8B8A8_SRGB:
-	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
-		dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
-		dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
-		dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
-		dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		break;
-	case VK_FORMAT_B8G8R8A8_UNORM:
-		dst.move(0, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		break;
-	case VK_FORMAT_B8G8R8A8_SRGB:
-		dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
-		dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
-		dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
-		dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		break;
-	case VK_FORMAT_R8G8B8A8_UINT:
-	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
-		dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
-		dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
-		dst.move(2, ((As<SIMD::UInt>(packed[0])>>16) & SIMD::UInt(0xFF)));
-		dst.move(3, ((As<SIMD::UInt>(packed[0])>>24) & SIMD::UInt(0xFF)));
-		break;
-	case VK_FORMAT_R8G8B8A8_SINT:
-	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
-		dst.move(0, (packed[0] << 24) >> 24);
-		dst.move(1, (packed[0] << 16) >> 24);
-		dst.move(2, (packed[0] << 8) >> 24);
-		dst.move(3, (packed[0]) >> 24);
-		break;
-	case VK_FORMAT_R8_UNORM:
-		dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(1, SIMD::Float(0));
-		dst.move(2, SIMD::Float(0));
-		dst.move(3, SIMD::Float(1));
-		break;
-	case VK_FORMAT_R8_UINT:
-	case VK_FORMAT_S8_UINT:
-		dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
-		dst.move(1, SIMD::UInt(0));
-		dst.move(2, SIMD::UInt(0));
-		dst.move(3, SIMD::UInt(1));
-		break;
-	case VK_FORMAT_R8_SINT:
-		dst.move(0, (packed[0] << 24) >> 24);
-		dst.move(1, SIMD::Int(0));
-		dst.move(2, SIMD::Int(0));
-		dst.move(3, SIMD::Int(1));
-		break;
-	case VK_FORMAT_R8G8_UNORM:
-		dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
-		dst.move(2, SIMD::Float(0));
-		dst.move(3, SIMD::Float(1));
-		break;
-	case VK_FORMAT_R8G8_UINT:
-		dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
-		dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
-		dst.move(2, SIMD::UInt(0));
-		dst.move(3, SIMD::UInt(1));
-		break;
-	case VK_FORMAT_R8G8_SINT:
-		dst.move(0, (packed[0] << 24) >> 24);
-		dst.move(1, (packed[0] << 16) >> 24);
-		dst.move(2, SIMD::Int(0));
-		dst.move(3, SIMD::Int(1));
-		break;
-	case VK_FORMAT_R16_SFLOAT:
-		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
-		dst.move(1, SIMD::Float(0));
-		dst.move(2, SIMD::Float(0));
-		dst.move(3, SIMD::Float(1));
-		break;
-	case VK_FORMAT_R16_UINT:
-		dst.move(0, packed[0] & SIMD::Int(0xffff));
-		dst.move(1, SIMD::UInt(0));
-		dst.move(2, SIMD::UInt(0));
-		dst.move(3, SIMD::UInt(1));
-		break;
-	case VK_FORMAT_R16_SINT:
-		dst.move(0, (packed[0] << 16) >> 16);
-		dst.move(1, SIMD::Int(0));
-		dst.move(2, SIMD::Int(0));
-		dst.move(3, SIMD::Int(1));
-		break;
-	case VK_FORMAT_R16G16_SFLOAT:
-		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
-		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
-		dst.move(2, SIMD::Float(0));
-		dst.move(3, SIMD::Float(1));
-		break;
-	case VK_FORMAT_R16G16_UINT:
-		dst.move(0, packed[0] & SIMD::Int(0xffff));
-		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
-		dst.move(2, SIMD::UInt(0));
-		dst.move(3, SIMD::UInt(1));
-		break;
-	case VK_FORMAT_R16G16_SINT:
-		dst.move(0, (packed[0] << 16) >> 16);
-		dst.move(1, (packed[0]) >> 16);
-		dst.move(2, SIMD::Int(0));
-		dst.move(3, SIMD::Int(1));
-		break;
-	case VK_FORMAT_R32G32_SINT:
-	case VK_FORMAT_R32G32_UINT:
-		dst.move(0, packed[0]);
-		dst.move(1, packed[1]);
-		dst.move(2, SIMD::Int(0));
-		dst.move(3, SIMD::Int(1));
-		break;
-	case VK_FORMAT_R32G32_SFLOAT:
-		dst.move(0, packed[0]);
-		dst.move(1, packed[1]);
-		dst.move(2, SIMD::Float(0));
-		dst.move(3, SIMD::Float(1));
-		break;
-	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
-		dst.move(0, (packed[0]) & SIMD::Int(0x3FF));
-		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
-		dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
-		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
-		break;
-	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
-		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
-		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
-		dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
-		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
-		break;
-	case VK_FORMAT_R5G6B5_UNORM_PACK16:
-		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
-		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
-		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
-		dst.move(3, SIMD::Float(1));
-		break;
-	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
-		dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
-		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
-		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
-		dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
-		break;
-	default:
-		UNIMPLEMENTED("VkFormat %d", int(vkFormat));
-		break;
+		case VK_FORMAT_R32G32B32A32_SFLOAT:
+		case VK_FORMAT_R32G32B32A32_SINT:
+		case VK_FORMAT_R32G32B32A32_UINT:
+			dst.move(0, packed[0]);
+			dst.move(1, packed[1]);
+			dst.move(2, packed[2]);
+			dst.move(3, packed[3]);
+			break;
+		case VK_FORMAT_R32_SINT:
+		case VK_FORMAT_R32_UINT:
+			dst.move(0, packed[0]);
+			// Fill remaining channels with 0,0,1 (of the correct type)
+			dst.move(1, SIMD::Int(0));
+			dst.move(2, SIMD::Int(0));
+			dst.move(3, SIMD::Int(1));
+			break;
+		case VK_FORMAT_R32_SFLOAT:
+		case VK_FORMAT_D32_SFLOAT:
+		case VK_FORMAT_D32_SFLOAT_S8_UINT:
+			dst.move(0, packed[0]);
+			// Fill remaining channels with 0,0,1 (of the correct type)
+			dst.move(1, SIMD::Float(0));
+			dst.move(2, SIMD::Float(0));
+			dst.move(3, SIMD::Float(1));
+			break;
+		case VK_FORMAT_D16_UNORM:
+			dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f));
+			dst.move(1, SIMD::Float(0));
+			dst.move(2, SIMD::Float(0));
+			dst.move(3, SIMD::Float(1));
+			break;
+		case VK_FORMAT_R16G16B16A16_SINT:
+			dst.move(0, (packed[0] << 16) >> 16);
+			dst.move(1, (packed[0]) >> 16);
+			dst.move(2, (packed[1] << 16) >> 16);
+			dst.move(3, (packed[1]) >> 16);
+			break;
+		case VK_FORMAT_R16G16B16A16_UINT:
+			dst.move(0, packed[0] & SIMD::Int(0xffff));
+			dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
+			dst.move(2, packed[1] & SIMD::Int(0xffff));
+			dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
+			break;
+		case VK_FORMAT_R16G16B16A16_SFLOAT:
+			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
+			dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
+			dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
+			dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
+			break;
+		case VK_FORMAT_R8G8B8A8_SNORM:
+			dst.move(0, Min(Max(SIMD::Float(((packed[0] << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+			dst.move(1, Min(Max(SIMD::Float(((packed[0] << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+			dst.move(2, Min(Max(SIMD::Float(((packed[0] << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+			dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+			break;
+		case VK_FORMAT_R8G8B8A8_UNORM:
+		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+			dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(2, SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			break;
+		case VK_FORMAT_R8G8B8A8_SRGB:
+		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+			dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+			dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+			dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+			dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			break;
+		case VK_FORMAT_B8G8R8A8_UNORM:
+			dst.move(0, SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			break;
+		case VK_FORMAT_B8G8R8A8_SRGB:
+			dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+			dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+			dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+			dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			break;
+		case VK_FORMAT_R8G8B8A8_UINT:
+		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+			dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
+			dst.move(1, ((As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF)));
+			dst.move(2, ((As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF)));
+			dst.move(3, ((As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF)));
+			break;
+		case VK_FORMAT_R8G8B8A8_SINT:
+		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+			dst.move(0, (packed[0] << 24) >> 24);
+			dst.move(1, (packed[0] << 16) >> 24);
+			dst.move(2, (packed[0] << 8) >> 24);
+			dst.move(3, (packed[0]) >> 24);
+			break;
+		case VK_FORMAT_R8_UNORM:
+			dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(1, SIMD::Float(0));
+			dst.move(2, SIMD::Float(0));
+			dst.move(3, SIMD::Float(1));
+			break;
+		case VK_FORMAT_R8_UINT:
+		case VK_FORMAT_S8_UINT:
+			dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
+			dst.move(1, SIMD::UInt(0));
+			dst.move(2, SIMD::UInt(0));
+			dst.move(3, SIMD::UInt(1));
+			break;
+		case VK_FORMAT_R8_SINT:
+			dst.move(0, (packed[0] << 24) >> 24);
+			dst.move(1, SIMD::Int(0));
+			dst.move(2, SIMD::Int(0));
+			dst.move(3, SIMD::Int(1));
+			break;
+		case VK_FORMAT_R8G8_UNORM:
+			dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+			dst.move(2, SIMD::Float(0));
+			dst.move(3, SIMD::Float(1));
+			break;
+		case VK_FORMAT_R8G8_UINT:
+			dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
+			dst.move(1, ((As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF)));
+			dst.move(2, SIMD::UInt(0));
+			dst.move(3, SIMD::UInt(1));
+			break;
+		case VK_FORMAT_R8G8_SINT:
+			dst.move(0, (packed[0] << 24) >> 24);
+			dst.move(1, (packed[0] << 16) >> 24);
+			dst.move(2, SIMD::Int(0));
+			dst.move(3, SIMD::Int(1));
+			break;
+		case VK_FORMAT_R16_SFLOAT:
+			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
+			dst.move(1, SIMD::Float(0));
+			dst.move(2, SIMD::Float(0));
+			dst.move(3, SIMD::Float(1));
+			break;
+		case VK_FORMAT_R16_UINT:
+			dst.move(0, packed[0] & SIMD::Int(0xffff));
+			dst.move(1, SIMD::UInt(0));
+			dst.move(2, SIMD::UInt(0));
+			dst.move(3, SIMD::UInt(1));
+			break;
+		case VK_FORMAT_R16_SINT:
+			dst.move(0, (packed[0] << 16) >> 16);
+			dst.move(1, SIMD::Int(0));
+			dst.move(2, SIMD::Int(0));
+			dst.move(3, SIMD::Int(1));
+			break;
+		case VK_FORMAT_R16G16_SFLOAT:
+			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
+			dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
+			dst.move(2, SIMD::Float(0));
+			dst.move(3, SIMD::Float(1));
+			break;
+		case VK_FORMAT_R16G16_UINT:
+			dst.move(0, packed[0] & SIMD::Int(0xffff));
+			dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
+			dst.move(2, SIMD::UInt(0));
+			dst.move(3, SIMD::UInt(1));
+			break;
+		case VK_FORMAT_R16G16_SINT:
+			dst.move(0, (packed[0] << 16) >> 16);
+			dst.move(1, (packed[0]) >> 16);
+			dst.move(2, SIMD::Int(0));
+			dst.move(3, SIMD::Int(1));
+			break;
+		case VK_FORMAT_R32G32_SINT:
+		case VK_FORMAT_R32G32_UINT:
+			dst.move(0, packed[0]);
+			dst.move(1, packed[1]);
+			dst.move(2, SIMD::Int(0));
+			dst.move(3, SIMD::Int(1));
+			break;
+		case VK_FORMAT_R32G32_SFLOAT:
+			dst.move(0, packed[0]);
+			dst.move(1, packed[1]);
+			dst.move(2, SIMD::Float(0));
+			dst.move(3, SIMD::Float(1));
+			break;
+		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+			dst.move(0, (packed[0]) & SIMD::Int(0x3FF));
+			dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
+			dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
+			dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
+			break;
+		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+			dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+			dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+			dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+			dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
+			break;
+		case VK_FORMAT_R5G6B5_UNORM_PACK16:
+			dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+			dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
+			dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+			dst.move(3, SIMD::Float(1));
+			break;
+		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+			dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+			dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+			dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+			dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
+			break;
+		default:
+			UNIMPLEMENTED("VkFormat %d", int(vkFormat));
+			break;
 	}
 
 	return EmitResult::Continue;
@@ -852,103 +852,106 @@
 	auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8));
 	switch(format)
 	{
-	case spv::ImageFormatRgba32f:
-	case spv::ImageFormatRgba32i:
-	case spv::ImageFormatRgba32ui:
-		texelSize = 16;
-		packed[0] = texel.Int(0);
-		packed[1] = texel.Int(1);
-		packed[2] = texel.Int(2);
-		packed[3] = texel.Int(3);
-		numPackedElements = 4;
-		break;
-	case spv::ImageFormatR32f:
-	case spv::ImageFormatR32i:
-	case spv::ImageFormatR32ui:
-		texelSize = 4;
-		packed[0] = texel.Int(0);
-		numPackedElements = 1;
-		break;
-	case spv::ImageFormatRgba8:
-		texelSize = 4;
-		packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
-			((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
-			((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
-			((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
-		numPackedElements = 1;
-		break;
-	case spv::ImageFormatRgba8Snorm:
-		texelSize = 4;
-		packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
-						SIMD::Int(0xFF)) |
-					((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
-						SIMD::Int(0xFF)) << 8) |
-					((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
-						SIMD::Int(0xFF)) << 16) |
-					((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
-						SIMD::Int(0xFF)) << 24);
-		numPackedElements = 1;
-		break;
-	case spv::ImageFormatRgba8i:
-	case spv::ImageFormatRgba8ui:
-		texelSize = 4;
-		packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
-					(SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
-					(SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
-					(SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
-		numPackedElements = 1;
-		break;
-	case spv::ImageFormatRgba16f:
-		texelSize = 8;
-		packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
-		packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true);
-		numPackedElements = 2;
-		break;
-	case spv::ImageFormatRgba16i:
-	case spv::ImageFormatRgba16ui:
-		texelSize = 8;
-		packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
-		packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16);
-		numPackedElements = 2;
-		break;
-	case spv::ImageFormatRg32f:
-	case spv::ImageFormatRg32i:
-	case spv::ImageFormatRg32ui:
-		texelSize = 8;
-		packed[0] = texel.Int(0);
-		packed[1] = texel.Int(1);
-		numPackedElements = 2;
-		break;
+		case spv::ImageFormatRgba32f:
+		case spv::ImageFormatRgba32i:
+		case spv::ImageFormatRgba32ui:
+			texelSize = 16;
+			packed[0] = texel.Int(0);
+			packed[1] = texel.Int(1);
+			packed[2] = texel.Int(2);
+			packed[3] = texel.Int(3);
+			numPackedElements = 4;
+			break;
+		case spv::ImageFormatR32f:
+		case spv::ImageFormatR32i:
+		case spv::ImageFormatR32ui:
+			texelSize = 4;
+			packed[0] = texel.Int(0);
+			numPackedElements = 1;
+			break;
+		case spv::ImageFormatRgba8:
+			texelSize = 4;
+			packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
+			            ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
+			            ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
+			            ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
+			numPackedElements = 1;
+			break;
+		case spv::ImageFormatRgba8Snorm:
+			texelSize = 4;
+			packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+			             SIMD::Int(0xFF)) |
+			            ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+			              SIMD::Int(0xFF))
+			             << 8) |
+			            ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+			              SIMD::Int(0xFF))
+			             << 16) |
+			            ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+			              SIMD::Int(0xFF))
+			             << 24);
+			numPackedElements = 1;
+			break;
+		case spv::ImageFormatRgba8i:
+		case spv::ImageFormatRgba8ui:
+			texelSize = 4;
+			packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
+			            (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
+			            (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
+			            (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
+			numPackedElements = 1;
+			break;
+		case spv::ImageFormatRgba16f:
+			texelSize = 8;
+			packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
+			packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true);
+			numPackedElements = 2;
+			break;
+		case spv::ImageFormatRgba16i:
+		case spv::ImageFormatRgba16ui:
+			texelSize = 8;
+			packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
+			packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16);
+			numPackedElements = 2;
+			break;
+		case spv::ImageFormatRg32f:
+		case spv::ImageFormatRg32i:
+		case spv::ImageFormatRg32ui:
+			texelSize = 8;
+			packed[0] = texel.Int(0);
+			packed[1] = texel.Int(1);
+			numPackedElements = 2;
+			break;
 
-	case spv::ImageFormatRg16f:
-	case spv::ImageFormatR11fG11fB10f:
-	case spv::ImageFormatR16f:
-	case spv::ImageFormatRgba16:
-	case spv::ImageFormatRgb10A2:
-	case spv::ImageFormatRg16:
-	case spv::ImageFormatRg8:
-	case spv::ImageFormatR16:
-	case spv::ImageFormatR8:
-	case spv::ImageFormatRgba16Snorm:
-	case spv::ImageFormatRg16Snorm:
-	case spv::ImageFormatRg8Snorm:
-	case spv::ImageFormatR16Snorm:
-	case spv::ImageFormatR8Snorm:
-	case spv::ImageFormatRg16i:
-	case spv::ImageFormatRg8i:
-	case spv::ImageFormatR16i:
-	case spv::ImageFormatR8i:
-	case spv::ImageFormatRgb10a2ui:
-	case spv::ImageFormatRg16ui:
-	case spv::ImageFormatRg8ui:
-	case spv::ImageFormatR16ui:
-	case spv::ImageFormatR8ui:
-		UNIMPLEMENTED("spv::ImageFormat %d", int(format));
-		break;
+		case spv::ImageFormatRg16f:
+		case spv::ImageFormatR11fG11fB10f:
+		case spv::ImageFormatR16f:
+		case spv::ImageFormatRgba16:
+		case spv::ImageFormatRgb10A2:
+		case spv::ImageFormatRg16:
+		case spv::ImageFormatRg8:
+		case spv::ImageFormatR16:
+		case spv::ImageFormatR8:
+		case spv::ImageFormatRgba16Snorm:
+		case spv::ImageFormatRg16Snorm:
+		case spv::ImageFormatRg8Snorm:
+		case spv::ImageFormatR16Snorm:
+		case spv::ImageFormatR8Snorm:
+		case spv::ImageFormatRg16i:
+		case spv::ImageFormatRg8i:
+		case spv::ImageFormatR16i:
+		case spv::ImageFormatR8i:
+		case spv::ImageFormatRgb10a2ui:
+		case spv::ImageFormatRg16ui:
+		case spv::ImageFormatRg8ui:
+		case spv::ImageFormatR16ui:
+		case spv::ImageFormatR8ui:
+			UNIMPLEMENTED("spv::ImageFormat %d", int(format));
+			break;
 
-	default:
-		UNREACHABLE("spv::ImageFormat %d", int(format));
-		break;
+		default:
+			UNREACHABLE("spv::ImageFormat %d", int(format));
+			break;
 	}
 
 	auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
diff --git a/src/Pipeline/SpirvShaderMemory.cpp b/src/Pipeline/SpirvShaderMemory.cpp
index c172d3c..8df565f 100644
--- a/src/Pipeline/SpirvShaderMemory.cpp
+++ b/src/Pipeline/SpirvShaderMemory.cpp
@@ -57,8 +57,7 @@
 	auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
 	auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
 
-	VisitMemoryObject(pointerId, [&](const MemoryElement& el)
-	{
+	VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
 		auto p = ptr + el.offset;
 		if(interleavedByLane) { p = InterleaveByLane(p); }  // TODO: Interleave once, then add offset?
 		dst.move(el.index, p.Load<SIMD::Float>(robustness, state->activeLaneMask(), atomic, memoryOrder));
@@ -101,8 +100,7 @@
 	{
 		// Constant source data.
 		const uint32_t *src = object.constantValue.get();
-		VisitMemoryObject(pointerId, [&](const MemoryElement& el)
-		{
+		VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
 			auto p = ptr + el.offset;
 			if(interleavedByLane) { p = InterleaveByLane(p); }
 			p.Store(SIMD::Int(src[el.index]), robustness, mask, atomic, memoryOrder);
@@ -112,8 +110,7 @@
 	{
 		// Intermediate source data.
 		auto &src = state->getIntermediate(objectId);
-		VisitMemoryObject(pointerId, [&](const MemoryElement& el)
-		{
+		VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
 			auto p = ptr + el.offset;
 			if(interleavedByLane) { p = InterleaveByLane(p); }
 			p.Store(src.Float(el.index), robustness, mask, atomic, memoryOrder);
@@ -132,95 +129,95 @@
 
 	switch(objectTy.storageClass)
 	{
-	case spv::StorageClassOutput:
-	case spv::StorageClassPrivate:
-	case spv::StorageClassFunction:
-	{
-		ASSERT(objectTy.opcode() == spv::OpTypePointer);
-		auto base = &routine->getVariable(resultId)[0];
-		auto elementTy = getType(objectTy.element);
-		auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
-		state->createPointer(resultId, SIMD::Pointer(base, size));
-		break;
-	}
-	case spv::StorageClassWorkgroup:
-	{
-		ASSERT(objectTy.opcode() == spv::OpTypePointer);
-		auto base = &routine->workgroupMemory[0];
-		auto size = workgroupMemory.size();
-		state->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
-		break;
-	}
-	case spv::StorageClassInput:
-	{
-		if(object.kind == Object::Kind::InterfaceVariable)
+		case spv::StorageClassOutput:
+		case spv::StorageClassPrivate:
+		case spv::StorageClassFunction:
 		{
-			auto &dst = routine->getVariable(resultId);
-			int offset = 0;
-			VisitInterface(resultId,
-							[&](Decorations const &d, AttribType type) {
-								auto scalarSlot = d.Location << 2 | d.Component;
-								dst[offset++] = routine->inputs[scalarSlot];
-							});
+			ASSERT(objectTy.opcode() == spv::OpTypePointer);
+			auto base = &routine->getVariable(resultId)[0];
+			auto elementTy = getType(objectTy.element);
+			auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
+			state->createPointer(resultId, SIMD::Pointer(base, size));
+			break;
 		}
-		ASSERT(objectTy.opcode() == spv::OpTypePointer);
-		auto base = &routine->getVariable(resultId)[0];
-		auto elementTy = getType(objectTy.element);
-		auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
-		state->createPointer(resultId, SIMD::Pointer(base, size));
-		break;
-	}
-	case spv::StorageClassUniformConstant:
-	{
-		const auto &d = descriptorDecorations.at(resultId);
-		ASSERT(d.DescriptorSet >= 0);
-		ASSERT(d.Binding >= 0);
+		case spv::StorageClassWorkgroup:
+		{
+			ASSERT(objectTy.opcode() == spv::OpTypePointer);
+			auto base = &routine->workgroupMemory[0];
+			auto size = workgroupMemory.size();
+			state->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
+			break;
+		}
+		case spv::StorageClassInput:
+		{
+			if(object.kind == Object::Kind::InterfaceVariable)
+			{
+				auto &dst = routine->getVariable(resultId);
+				int offset = 0;
+				VisitInterface(resultId,
+				               [&](Decorations const &d, AttribType type) {
+					               auto scalarSlot = d.Location << 2 | d.Component;
+					               dst[offset++] = routine->inputs[scalarSlot];
+				               });
+			}
+			ASSERT(objectTy.opcode() == spv::OpTypePointer);
+			auto base = &routine->getVariable(resultId)[0];
+			auto elementTy = getType(objectTy.element);
+			auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
+			state->createPointer(resultId, SIMD::Pointer(base, size));
+			break;
+		}
+		case spv::StorageClassUniformConstant:
+		{
+			const auto &d = descriptorDecorations.at(resultId);
+			ASSERT(d.DescriptorSet >= 0);
+			ASSERT(d.Binding >= 0);
 
-		uint32_t arrayIndex = 0;  // TODO(b/129523279)
-		auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
-		if(setLayout->hasBinding(d.Binding))
-		{
-			uint32_t bindingOffset = static_cast<uint32_t>(setLayout->getBindingOffset(d.Binding, arrayIndex));
-			Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet];  // DescriptorSet*
-			Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset);    // vk::SampledImageDescriptor*
-			auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
-			state->createPointer(resultId, SIMD::Pointer(binding, size));
+			uint32_t arrayIndex = 0;  // TODO(b/129523279)
+			auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
+			if(setLayout->hasBinding(d.Binding))
+			{
+				uint32_t bindingOffset = static_cast<uint32_t>(setLayout->getBindingOffset(d.Binding, arrayIndex));
+				Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet];  // DescriptorSet*
+				Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset);    // vk::SampledImageDescriptor*
+				auto size = 0;                                                 // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
+				state->createPointer(resultId, SIMD::Pointer(binding, size));
+			}
+			else
+			{
+				// TODO: Error if the variable with the non-existant binding is
+				// used? Or perhaps strip these unused variable declarations as
+				// a preprocess on the SPIR-V?
+			}
+			break;
 		}
-		else
+		case spv::StorageClassUniform:
+		case spv::StorageClassStorageBuffer:
 		{
-			// TODO: Error if the variable with the non-existant binding is
-			// used? Or perhaps strip these unused variable declarations as
-			// a preprocess on the SPIR-V?
+			const auto &d = descriptorDecorations.at(resultId);
+			ASSERT(d.DescriptorSet >= 0);
+			auto size = 0;  // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
+			// Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number
+			// of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this
+			// is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it.
+			if(d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS)
+			{
+				state->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
+			}
+			else
+			{
+				state->createPointer(resultId, SIMD::Pointer(nullptr, 0));
+			}
+			break;
 		}
-		break;
-	}
-	case spv::StorageClassUniform:
-	case spv::StorageClassStorageBuffer:
-	{
-		const auto &d = descriptorDecorations.at(resultId);
-		ASSERT(d.DescriptorSet >= 0);
-		auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
-		// Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number
-		// of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this
-		// is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it.
-		if(d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS)
+		case spv::StorageClassPushConstant:
 		{
-			state->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
+			state->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
+			break;
 		}
-		else
-		{
-			state->createPointer(resultId, SIMD::Pointer(nullptr, 0));
-		}
-		break;
-	}
-	case spv::StorageClassPushConstant:
-	{
-		state->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
-		break;
-	}
-	default:
-		UNREACHABLE("Storage class %d", objectTy.storageClass);
-		break;
+		default:
+			UNREACHABLE("Storage class %d", objectTy.storageClass);
+			break;
 	}
 
 	if(insn.wordCount() > 4)
@@ -232,24 +229,23 @@
 		}
 		switch(objectTy.storageClass)
 		{
-		case spv::StorageClassOutput:
-		case spv::StorageClassPrivate:
-		case spv::StorageClassFunction:
-		{
-			bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
-			auto ptr = GetPointerToData(resultId, 0, state);
-			GenericValue initialValue(this, state, initializerId);
-			VisitMemoryObject(resultId, [&](const MemoryElement& el)
+			case spv::StorageClassOutput:
+			case spv::StorageClassPrivate:
+			case spv::StorageClassFunction:
 			{
-				auto p = ptr + el.offset;
-				if(interleavedByLane) { p = InterleaveByLane(p); }
-				auto robustness = OutOfBoundsBehavior::UndefinedBehavior;  // Local variables are always within bounds.
-				p.Store(initialValue.Float(el.index), robustness, state->activeLaneMask());
-			});
-			break;
-		}
-		default:
-			ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
+				bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
+				auto ptr = GetPointerToData(resultId, 0, state);
+				GenericValue initialValue(this, state, initializerId);
+				VisitMemoryObject(resultId, [&](const MemoryElement &el) {
+					auto p = ptr + el.offset;
+					if(interleavedByLane) { p = InterleaveByLane(p); }
+					auto robustness = OutOfBoundsBehavior::UndefinedBehavior;  // Local variables are always within bounds.
+					p.Store(initialValue.Float(el.index), robustness, state->activeLaneMask());
+				});
+				break;
+			}
+			default:
+				ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
 		}
 	}
 
@@ -271,10 +267,9 @@
 
 	std::unordered_map<uint32_t, uint32_t> srcOffsets;
 
-	VisitMemoryObject(srcPtrId, [&](const MemoryElement& el) { srcOffsets[el.index] = el.offset; });
+	VisitMemoryObject(srcPtrId, [&](const MemoryElement &el) { srcOffsets[el.index] = el.offset; });
 
-	VisitMemoryObject(dstPtrId, [&](const MemoryElement& el)
-	{
+	VisitMemoryObject(dstPtrId, [&](const MemoryElement &el) {
 		auto it = srcOffsets.find(el.index);
 		ASSERT(it != srcOffsets.end());
 		auto srcOffset = it->second;
@@ -303,7 +298,7 @@
 	return EmitResult::Continue;
 }
 
-void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t& index, uint32_t offset, const MemoryVisitor& f) const
+void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &f) const
 {
 	ApplyDecorationsForId(&d, id);
 	auto const &type = getType(id);
@@ -316,60 +311,60 @@
 
 	switch(type.opcode())
 	{
-	case spv::OpTypePointer:
-		VisitMemoryObjectInner(type.definition.word(3), d, index, offset, f);
-		break;
-	case spv::OpTypeInt:
-	case spv::OpTypeFloat:
-	case spv::OpTypeRuntimeArray:
-		f(MemoryElement{index++, offset, type});
-		break;
-	case spv::OpTypeVector:
-	{
-		auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
-		for(auto i = 0u; i < type.definition.word(3); i++)
+		case spv::OpTypePointer:
+			VisitMemoryObjectInner(type.definition.word(3), d, index, offset, f);
+			break;
+		case spv::OpTypeInt:
+		case spv::OpTypeFloat:
+		case spv::OpTypeRuntimeArray:
+			f(MemoryElement{ index++, offset, type });
+			break;
+		case spv::OpTypeVector:
 		{
-			VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
+			auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
+			for(auto i = 0u; i < type.definition.word(3); i++)
+			{
+				VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
+			}
+			break;
 		}
-		break;
-	}
-	case spv::OpTypeMatrix:
-	{
-		auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
-		d.InsideMatrix = true;
-		for(auto i = 0u; i < type.definition.word(3); i++)
+		case spv::OpTypeMatrix:
 		{
-			ASSERT(d.HasMatrixStride);
-			VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f);
+			auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
+			d.InsideMatrix = true;
+			for(auto i = 0u; i < type.definition.word(3); i++)
+			{
+				ASSERT(d.HasMatrixStride);
+				VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f);
+			}
+			break;
 		}
-		break;
-	}
-	case spv::OpTypeStruct:
-		for(auto i = 0u; i < type.definition.wordCount() - 2; i++)
+		case spv::OpTypeStruct:
+			for(auto i = 0u; i < type.definition.wordCount() - 2; i++)
+			{
+				ApplyDecorationsForIdMember(&d, id, i);
+				VisitMemoryObjectInner(type.definition.word(i + 2), d, index, offset, f);
+			}
+			break;
+		case spv::OpTypeArray:
 		{
-			ApplyDecorationsForIdMember(&d, id, i);
-			VisitMemoryObjectInner(type.definition.word(i + 2), d, index, offset, f);
+			auto arraySize = GetConstScalarInt(type.definition.word(3));
+			for(auto i = 0u; i < arraySize; i++)
+			{
+				ASSERT(d.HasArrayStride);
+				VisitMemoryObjectInner(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
+			}
+			break;
 		}
-		break;
-	case spv::OpTypeArray:
-	{
-		auto arraySize = GetConstScalarInt(type.definition.word(3));
-		for(auto i = 0u; i < arraySize; i++)
-		{
-			ASSERT(d.HasArrayStride);
-			VisitMemoryObjectInner(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
-		}
-		break;
-	}
-	default:
-		UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
+		default:
+			UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
 	}
 }
 
-void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, const MemoryVisitor& f) const
+void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, const MemoryVisitor &f) const
 {
 	auto typeId = getObject(id).type;
-	auto const & type = getType(typeId);
+	auto const &type = getType(typeId);
 	if(IsExplicitLayout(type.storageClass))
 	{
 		Decorations d{};
@@ -384,7 +379,7 @@
 		for(auto index = 0u; index < elType.sizeInComponents; index++)
 		{
 			auto offset = static_cast<uint32_t>(index * sizeof(float));
-			f({index, offset, elType});
+			f({ index, offset, elType });
 		}
 	}
 }
@@ -411,15 +406,15 @@
 			ASSERT_MSG(setLayout->hasBinding(d.Binding), "Descriptor set %d does not contain binding %d", int(d.DescriptorSet), int(d.Binding));
 			int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
 
-			Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor*
-			Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void*
+			Pointer<Byte> descriptor = set.base + bindingOffset;                                           // BufferDescriptor*
+			Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr));  // void*
 			Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
 			if(setLayout->isBindingDynamic(d.Binding))
 			{
 				uint32_t dynamicBindingIndex =
-					routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) +
-					setLayout->getDynamicDescriptorOffset(d.Binding) +
-					arrayIndex;
+				    routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) +
+				    setLayout->getDynamicDescriptorOffset(d.Binding) +
+				    arrayIndex;
 				Int offset = routine->descriptorDynamicOffsets[dynamicBindingIndex];
 				Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
 				return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
@@ -439,23 +434,22 @@
 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
 {
 	auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
-		spv::MemorySemanticsAcquireMask |
-		spv::MemorySemanticsReleaseMask |
-		spv::MemorySemanticsAcquireReleaseMask |
-		spv::MemorySemanticsSequentiallyConsistentMask
-	);
+	                                                            spv::MemorySemanticsAcquireMask |
+	                                                            spv::MemorySemanticsReleaseMask |
+	                                                            spv::MemorySemanticsAcquireReleaseMask |
+	                                                            spv::MemorySemanticsSequentiallyConsistentMask);
 	switch(control)
 	{
-	case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
-	case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
-	case spv::MemorySemanticsReleaseMask:                return std::memory_order_release;
-	case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
-	case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
-	default:
-		// "it is invalid for more than one of these four bits to be set:
-		// Acquire, Release, AcquireRelease, or SequentiallyConsistent."
-		UNREACHABLE("MemorySemanticsMask: %x", int(control));
-		return std::memory_order_acq_rel;
+		case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
+		case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
+		case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
+		case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
+		case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
+		default:
+			// "it is invalid for more than one of these four bits to be set:
+			// Acquire, Release, AcquireRelease, or SequentiallyConsistent."
+			UNREACHABLE("MemorySemanticsMask: %x", int(control));
+			return std::memory_order_acq_rel;
 	}
 }
 
@@ -463,12 +457,12 @@
 {
 	switch(storageClass)
 	{
-	case spv::StorageClassUniform:
-	case spv::StorageClassStorageBuffer:
-	case spv::StorageClassImage:
-		return false;
-	default:
-		return true;
+		case spv::StorageClassUniform:
+		case spv::StorageClassStorageBuffer:
+		case spv::StorageClassImage:
+			return false;
+		default:
+			return true;
 	}
 }
 
@@ -476,12 +470,12 @@
 {
 	switch(storageClass)
 	{
-	case spv::StorageClassUniform:
-	case spv::StorageClassStorageBuffer:
-	case spv::StorageClassPushConstant:
-		return true;
-	default:
-		return false;
+		case spv::StorageClassUniform:
+		case spv::StorageClassStorageBuffer:
+		case spv::StorageClassPushConstant:
+			return true;
+		default:
+			return false;
 	}
 }
 
@@ -499,14 +493,14 @@
 {
 	switch(storageClass)
 	{
-	case spv::StorageClassUniform:
-	case spv::StorageClassStorageBuffer:
-	case spv::StorageClassPushConstant:
-	case spv::StorageClassWorkgroup:
-	case spv::StorageClassImage:
-		return false;
-	default:
-		return true;
+		case spv::StorageClassUniform:
+		case spv::StorageClassStorageBuffer:
+		case spv::StorageClassPushConstant:
+		case spv::StorageClassWorkgroup:
+		case spv::StorageClassImage:
+			return false;
+		default:
+			return true;
 	}
 }
 
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 1d9694a..45deba6 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -14,14 +14,14 @@
 
 #include "SpirvShader.hpp"
 
-#include "SamplerCore.hpp" // TODO: Figure out what's needed.
+#include "SamplerCore.hpp"  // TODO: Figure out what's needed.
+#include "Device/Config.hpp"
 #include "System/Math.hpp"
 #include "Vulkan/VkDebug.hpp"
 #include "Vulkan/VkDescriptorSetLayout.hpp"
 #include "Vulkan/VkDevice.hpp"
 #include "Vulkan/VkImageView.hpp"
 #include "Vulkan/VkSampler.hpp"
-#include "Device/Config.hpp"
 
 #include <spirv/unified1/spirv.hpp>
 
@@ -36,22 +36,22 @@
 	const auto samplerId = sampler ? sampler->id : 0;
 	ASSERT(imageDescriptor->imageViewId != 0 && (samplerId != 0 || instruction.samplerMethod == Fetch));
 
-	vk::Device::SamplingRoutineCache::Key key = {inst, imageDescriptor->imageViewId, samplerId};
+	vk::Device::SamplingRoutineCache::Key key = { inst, imageDescriptor->imageViewId, samplerId };
 
 	ASSERT(imageDescriptor->device);
 
 	if(auto routine = imageDescriptor->device->findInConstCache(key))
 	{
-		return (ImageSampler*)(routine->getEntry());
+		return (ImageSampler *)(routine->getEntry());
 	}
 
 	std::unique_lock<std::mutex> lock(imageDescriptor->device->getSamplingRoutineCacheMutex());
-	vk::Device::SamplingRoutineCache* cache = imageDescriptor->device->getSamplingRoutineCache();
+	vk::Device::SamplingRoutineCache *cache = imageDescriptor->device->getSamplingRoutineCache();
 
 	auto routine = cache->query(key);
 	if(routine)
 	{
-		return (ImageSampler*)(routine->getEntry());
+		return (ImageSampler *)(routine->getEntry());
 	}
 
 	auto type = imageDescriptor->type;
@@ -69,9 +69,9 @@
 	samplerState.swizzle = imageDescriptor->swizzle;
 	samplerState.gatherComponent = instruction.gatherComponent;
 	samplerState.highPrecisionFiltering = false;
-	samplerState.largeTexture = (imageDescriptor->extent.width  > SHRT_MAX) ||
+	samplerState.largeTexture = (imageDescriptor->extent.width > SHRT_MAX) ||
 	                            (imageDescriptor->extent.height > SHRT_MAX) ||
-	                            (imageDescriptor->extent.depth  > SHRT_MAX);
+	                            (imageDescriptor->extent.depth > SHRT_MAX);
 
 	if(sampler)
 	{
@@ -100,7 +100,7 @@
 	routine = emitSamplerRoutine(instruction, samplerState);
 
 	cache->add(key, routine);
-	return (ImageSampler*)(routine->getEntry());
+	return (ImageSampler *)(routine->getEntry());
 }
 
 std::shared_ptr<rr::Routine> SpirvShader::emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState)
@@ -114,17 +114,17 @@
 		Pointer<SIMD::Float> out = function.Arg<3>();
 		Pointer<Byte> constants = function.Arg<4>();
 
-		SIMD::Float uvw[4] = {0, 0, 0, 0};
+		SIMD::Float uvw[4] = { 0, 0, 0, 0 };
 		SIMD::Float q = 0;
 		SIMD::Float lodOrBias = 0;  // Explicit level-of-detail, or bias added to the implicit level-of-detail (depending on samplerMethod).
-		Vector4f dsx = {0, 0, 0, 0};
-		Vector4f dsy = {0, 0, 0, 0};
-		Vector4f offset = {0, 0, 0, 0};
+		Vector4f dsx = { 0, 0, 0, 0 };
+		Vector4f dsy = { 0, 0, 0, 0 };
+		Vector4f offset = { 0, 0, 0, 0 };
 		SIMD::Int sampleId = 0;
 		SamplerFunction samplerFunction = instruction.getSamplerFunction();
 
 		uint32_t i = 0;
-		for( ; i < instruction.coordinates; i++)
+		for(; i < instruction.coordinates; i++)
 		{
 			uvw[i] = in[i];
 		}
@@ -231,28 +231,28 @@
 {
 	switch(sampler->magFilter)
 	{
-	case VK_FILTER_NEAREST:
-		switch(sampler->minFilter)
-		{
-		case VK_FILTER_NEAREST: return FILTER_POINT;
-		case VK_FILTER_LINEAR:  return FILTER_MIN_LINEAR_MAG_POINT;
+		case VK_FILTER_NEAREST:
+			switch(sampler->minFilter)
+			{
+				case VK_FILTER_NEAREST: return FILTER_POINT;
+				case VK_FILTER_LINEAR: return FILTER_MIN_LINEAR_MAG_POINT;
+				default:
+					UNIMPLEMENTED("minFilter %d", sampler->minFilter);
+					return FILTER_POINT;
+			}
+			break;
+		case VK_FILTER_LINEAR:
+			switch(sampler->minFilter)
+			{
+				case VK_FILTER_NEAREST: return FILTER_MIN_POINT_MAG_LINEAR;
+				case VK_FILTER_LINEAR: return FILTER_LINEAR;
+				default:
+					UNIMPLEMENTED("minFilter %d", sampler->minFilter);
+					return FILTER_POINT;
+			}
+			break;
 		default:
-			UNIMPLEMENTED("minFilter %d", sampler->minFilter);
-			return FILTER_POINT;
-		}
-		break;
-	case VK_FILTER_LINEAR:
-		switch(sampler->minFilter)
-		{
-		case VK_FILTER_NEAREST: return FILTER_MIN_POINT_MAG_LINEAR;
-		case VK_FILTER_LINEAR:  return FILTER_LINEAR;
-		default:
-			UNIMPLEMENTED("minFilter %d", sampler->minFilter);
-			return FILTER_POINT;
-		}
-		break;
-	default:
-		break;
+			break;
 	}
 
 	UNIMPLEMENTED("magFilter %d", sampler->magFilter);
@@ -273,11 +273,11 @@
 
 	switch(sampler->mipmapMode)
 	{
-	case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MIPMAP_POINT;
-	case VK_SAMPLER_MIPMAP_MODE_LINEAR:  return MIPMAP_LINEAR;
-	default:
-		UNIMPLEMENTED("mipmapMode %d", sampler->mipmapMode);
-		return MIPMAP_POINT;
+		case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MIPMAP_POINT;
+		case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MIPMAP_LINEAR;
+		default:
+			UNIMPLEMENTED("mipmapMode %d", sampler->mipmapMode);
+			return MIPMAP_POINT;
 	}
 }
 
@@ -285,77 +285,77 @@
 {
 	switch(imageViewType)
 	{
-	case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
-		if(coordinateIndex == 3)
-		{
-			return ADDRESSING_LAYER;
-		}
-		// Fall through to CUBE case:
-	case VK_IMAGE_VIEW_TYPE_CUBE:
-		if(coordinateIndex <= 1)  // Cube faces themselves are addressed as 2D images.
-		{
-			// Vulkan 1.1 spec:
-			// "Cube images ignore the wrap modes specified in the sampler. Instead, if VK_FILTER_NEAREST is used within a mip level then
-			//  VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE is used, and if VK_FILTER_LINEAR is used within a mip level then sampling at the edges
-			//  is performed as described earlier in the Cube map edge handling section."
-			// This corresponds with our 'SEAMLESS' addressing mode.
-			return ADDRESSING_SEAMLESS;
-		}
-		else if(coordinateIndex == 2)
-		{
-			// The cube face is an index into array layers.
-			return ADDRESSING_CUBEFACE;
-		}
-		else
-		{
-			return ADDRESSING_UNUSED;
-		}
-		break;
+		case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
+			if(coordinateIndex == 3)
+			{
+				return ADDRESSING_LAYER;
+			}
+			// Fall through to CUBE case:
+		case VK_IMAGE_VIEW_TYPE_CUBE:
+			if(coordinateIndex <= 1)  // Cube faces themselves are addressed as 2D images.
+			{
+				// Vulkan 1.1 spec:
+				// "Cube images ignore the wrap modes specified in the sampler. Instead, if VK_FILTER_NEAREST is used within a mip level then
+				//  VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE is used, and if VK_FILTER_LINEAR is used within a mip level then sampling at the edges
+				//  is performed as described earlier in the Cube map edge handling section."
+				// This corresponds with our 'SEAMLESS' addressing mode.
+				return ADDRESSING_SEAMLESS;
+			}
+			else if(coordinateIndex == 2)
+			{
+				// The cube face is an index into array layers.
+				return ADDRESSING_CUBEFACE;
+			}
+			else
+			{
+				return ADDRESSING_UNUSED;
+			}
+			break;
 
-	case VK_IMAGE_VIEW_TYPE_1D:  // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
-		if(coordinateIndex == 1)
-		{
+		case VK_IMAGE_VIEW_TYPE_1D:  // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
+			if(coordinateIndex == 1)
+			{
+				return ADDRESSING_WRAP;
+			}
+			else if(coordinateIndex >= 2)
+			{
+				return ADDRESSING_UNUSED;
+			}
+			break;
+
+		case VK_IMAGE_VIEW_TYPE_3D:
+			if(coordinateIndex >= 3)
+			{
+				return ADDRESSING_UNUSED;
+			}
+			break;
+
+		case VK_IMAGE_VIEW_TYPE_1D_ARRAY:  // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
+			if(coordinateIndex == 1)
+			{
+				return ADDRESSING_WRAP;
+			}
+			// Fall through to 2D_ARRAY case:
+		case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
+			if(coordinateIndex == 2)
+			{
+				return ADDRESSING_LAYER;
+			}
+			else if(coordinateIndex >= 3)
+			{
+				return ADDRESSING_UNUSED;
+			}
+			// Fall through to 2D case:
+		case VK_IMAGE_VIEW_TYPE_2D:
+			if(coordinateIndex >= 2)
+			{
+				return ADDRESSING_UNUSED;
+			}
+			break;
+
+		default:
+			UNIMPLEMENTED("imageViewType %d", imageViewType);
 			return ADDRESSING_WRAP;
-		}
-		else if(coordinateIndex >= 2)
-		{
-			return ADDRESSING_UNUSED;
-		}
-		break;
-
-	case VK_IMAGE_VIEW_TYPE_3D:
-		if(coordinateIndex >= 3)
-		{
-			return ADDRESSING_UNUSED;
-		}
-		break;
-
-	case VK_IMAGE_VIEW_TYPE_1D_ARRAY:  // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
-		if(coordinateIndex == 1)
-		{
-			return ADDRESSING_WRAP;
-		}
-		// Fall through to 2D_ARRAY case:
-	case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
-		if(coordinateIndex == 2)
-		{
-			return ADDRESSING_LAYER;
-		}
-		else if(coordinateIndex >= 3)
-		{
-			return ADDRESSING_UNUSED;
-		}
-		// Fall through to 2D case:
-	case VK_IMAGE_VIEW_TYPE_2D:
-		if(coordinateIndex >= 2)
-		{
-			return ADDRESSING_UNUSED;
-		}
-		break;
-
-	default:
-		UNIMPLEMENTED("imageViewType %d", imageViewType);
-		return ADDRESSING_WRAP;
 	}
 
 	if(!sampler)
@@ -373,23 +373,23 @@
 	VkSamplerAddressMode addressMode = VK_SAMPLER_ADDRESS_MODE_REPEAT;
 	switch(coordinateIndex)
 	{
-	case 0: addressMode = sampler->addressModeU; break;
-	case 1: addressMode = sampler->addressModeV; break;
-	case 2: addressMode = sampler->addressModeW; break;
-	default: UNSUPPORTED("coordinateIndex: %d", coordinateIndex);
+		case 0: addressMode = sampler->addressModeU; break;
+		case 1: addressMode = sampler->addressModeV; break;
+		case 2: addressMode = sampler->addressModeW; break;
+		default: UNSUPPORTED("coordinateIndex: %d", coordinateIndex);
 	}
 
 	switch(addressMode)
 	{
-	case VK_SAMPLER_ADDRESS_MODE_REPEAT:               return ADDRESSING_WRAP;
-	case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:      return ADDRESSING_MIRROR;
-	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:        return ADDRESSING_CLAMP;
-	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:      return ADDRESSING_BORDER;
-	case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return ADDRESSING_MIRRORONCE;
-	default:
-		UNIMPLEMENTED("addressMode %d", addressMode);
-		return ADDRESSING_WRAP;
+		case VK_SAMPLER_ADDRESS_MODE_REPEAT: return ADDRESSING_WRAP;
+		case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return ADDRESSING_MIRROR;
+		case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return ADDRESSING_CLAMP;
+		case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return ADDRESSING_BORDER;
+		case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return ADDRESSING_MIRRORONCE;
+		default:
+			UNIMPLEMENTED("addressMode %d", addressMode);
+			return ADDRESSING_WRAP;
 	}
 }
 
-} // namespace sw
+}  // namespace sw
diff --git a/src/Pipeline/SpirvShaderSpec.cpp b/src/Pipeline/SpirvShaderSpec.cpp
index 2d3c402..3bfd153 100644
--- a/src/Pipeline/SpirvShaderSpec.cpp
+++ b/src/Pipeline/SpirvShaderSpec.cpp
@@ -24,132 +24,132 @@
 
 	switch(opcode)
 	{
-	case spv::OpIAdd:
-	case spv::OpISub:
-	case spv::OpIMul:
-	case spv::OpUDiv:
-	case spv::OpSDiv:
-	case spv::OpUMod:
-	case spv::OpSMod:
-	case spv::OpSRem:
-	case spv::OpShiftRightLogical:
-	case spv::OpShiftRightArithmetic:
-	case spv::OpShiftLeftLogical:
-	case spv::OpBitwiseOr:
-	case spv::OpLogicalOr:
-	case spv::OpBitwiseAnd:
-	case spv::OpLogicalAnd:
-	case spv::OpBitwiseXor:
-	case spv::OpLogicalEqual:
-	case spv::OpIEqual:
-	case spv::OpLogicalNotEqual:
-	case spv::OpINotEqual:
-	case spv::OpULessThan:
-	case spv::OpSLessThan:
-	case spv::OpUGreaterThan:
-	case spv::OpSGreaterThan:
-	case spv::OpULessThanEqual:
-	case spv::OpSLessThanEqual:
-	case spv::OpUGreaterThanEqual:
-	case spv::OpSGreaterThanEqual:
-		EvalSpecConstantBinaryOp(insn);
-		break;
+		case spv::OpIAdd:
+		case spv::OpISub:
+		case spv::OpIMul:
+		case spv::OpUDiv:
+		case spv::OpSDiv:
+		case spv::OpUMod:
+		case spv::OpSMod:
+		case spv::OpSRem:
+		case spv::OpShiftRightLogical:
+		case spv::OpShiftRightArithmetic:
+		case spv::OpShiftLeftLogical:
+		case spv::OpBitwiseOr:
+		case spv::OpLogicalOr:
+		case spv::OpBitwiseAnd:
+		case spv::OpLogicalAnd:
+		case spv::OpBitwiseXor:
+		case spv::OpLogicalEqual:
+		case spv::OpIEqual:
+		case spv::OpLogicalNotEqual:
+		case spv::OpINotEqual:
+		case spv::OpULessThan:
+		case spv::OpSLessThan:
+		case spv::OpUGreaterThan:
+		case spv::OpSGreaterThan:
+		case spv::OpULessThanEqual:
+		case spv::OpSLessThanEqual:
+		case spv::OpUGreaterThanEqual:
+		case spv::OpSGreaterThanEqual:
+			EvalSpecConstantBinaryOp(insn);
+			break;
 
-	case spv::OpSConvert:
-	case spv::OpFConvert:
-	case spv::OpUConvert:
-	case spv::OpSNegate:
-	case spv::OpNot:
-	case spv::OpLogicalNot:
-	case spv::OpQuantizeToF16:
-		EvalSpecConstantUnaryOp(insn);
-		break;
+		case spv::OpSConvert:
+		case spv::OpFConvert:
+		case spv::OpUConvert:
+		case spv::OpSNegate:
+		case spv::OpNot:
+		case spv::OpLogicalNot:
+		case spv::OpQuantizeToF16:
+			EvalSpecConstantUnaryOp(insn);
+			break;
 
-	case spv::OpSelect:
-	{
-		auto &result = CreateConstant(insn);
-		auto const &cond = getObject(insn.word(4));
-		auto condIsScalar = (getType(cond.type).sizeInComponents == 1);
-		auto const &left = getObject(insn.word(5));
-		auto const &right = getObject(insn.word(6));
-
-		for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
+		case spv::OpSelect:
 		{
-			auto sel = cond.constantValue[condIsScalar ? 0 : i];
-			result.constantValue[i] = sel ? left.constantValue[i] : right.constantValue[i];
-		}
-		break;
-	}
+			auto &result = CreateConstant(insn);
+			auto const &cond = getObject(insn.word(4));
+			auto condIsScalar = (getType(cond.type).sizeInComponents == 1);
+			auto const &left = getObject(insn.word(5));
+			auto const &right = getObject(insn.word(6));
 
-	case spv::OpCompositeExtract:
-	{
-		auto &result = CreateConstant(insn);
-		auto const &compositeObject = getObject(insn.word(4));
-		auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - 5, insn.wordPointer(5));
-
-		for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
-		{
-			result.constantValue[i] = compositeObject.constantValue[firstComponent + i];
-		}
-		break;
-	}
-
-	case spv::OpCompositeInsert:
-	{
-		auto &result = CreateConstant(insn);
-		auto const &newPart = getObject(insn.word(4));
-		auto const &oldObject = getObject(insn.word(5));
-		auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - 6, insn.wordPointer(6));
-
-		// old components before
-		for(auto i = 0u; i < firstNewComponent; i++)
-		{
-			result.constantValue[i] = oldObject.constantValue[i];
-		}
-		// new part
-		for(auto i = 0u; i < getType(newPart.type).sizeInComponents; i++)
-		{
-			result.constantValue[firstNewComponent + i] = newPart.constantValue[i];
-		}
-		// old components after
-		for(auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++)
-		{
-			result.constantValue[i] = oldObject.constantValue[i];
-		}
-		break;
-	}
-
-	case spv::OpVectorShuffle:
-	{
-		auto &result = CreateConstant(insn);
-		auto const &firstHalf = getObject(insn.word(4));
-		auto const &secondHalf = getObject(insn.word(5));
-
-		for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
-		{
-			auto selector = insn.word(6 + i);
-			if(selector == static_cast<uint32_t>(-1))
+			for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
 			{
-				// Undefined value, we'll use zero
-				result.constantValue[i] = 0;
+				auto sel = cond.constantValue[condIsScalar ? 0 : i];
+				result.constantValue[i] = sel ? left.constantValue[i] : right.constantValue[i];
 			}
-			else if(selector < getType(firstHalf.type).sizeInComponents)
-			{
-				result.constantValue[i] = firstHalf.constantValue[selector];
-			}
-			else
-			{
-				result.constantValue[i] = secondHalf.constantValue[selector - getType(firstHalf.type).sizeInComponents];
-			}
+			break;
 		}
-		break;
-	}
 
-	default:
-		// Other spec constant ops are possible, but require capabilities that are
-		// not exposed in our Vulkan implementation (eg Kernel), so we should never
-		// get here for correct shaders.
-		UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str());
+		case spv::OpCompositeExtract:
+		{
+			auto &result = CreateConstant(insn);
+			auto const &compositeObject = getObject(insn.word(4));
+			auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - 5, insn.wordPointer(5));
+
+			for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
+			{
+				result.constantValue[i] = compositeObject.constantValue[firstComponent + i];
+			}
+			break;
+		}
+
+		case spv::OpCompositeInsert:
+		{
+			auto &result = CreateConstant(insn);
+			auto const &newPart = getObject(insn.word(4));
+			auto const &oldObject = getObject(insn.word(5));
+			auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - 6, insn.wordPointer(6));
+
+			// old components before
+			for(auto i = 0u; i < firstNewComponent; i++)
+			{
+				result.constantValue[i] = oldObject.constantValue[i];
+			}
+			// new part
+			for(auto i = 0u; i < getType(newPart.type).sizeInComponents; i++)
+			{
+				result.constantValue[firstNewComponent + i] = newPart.constantValue[i];
+			}
+			// old components after
+			for(auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++)
+			{
+				result.constantValue[i] = oldObject.constantValue[i];
+			}
+			break;
+		}
+
+		case spv::OpVectorShuffle:
+		{
+			auto &result = CreateConstant(insn);
+			auto const &firstHalf = getObject(insn.word(4));
+			auto const &secondHalf = getObject(insn.word(5));
+
+			for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
+			{
+				auto selector = insn.word(6 + i);
+				if(selector == static_cast<uint32_t>(-1))
+				{
+					// Undefined value, we'll use zero
+					result.constantValue[i] = 0;
+				}
+				else if(selector < getType(firstHalf.type).sizeInComponents)
+				{
+					result.constantValue[i] = firstHalf.constantValue[selector];
+				}
+				else
+				{
+					result.constantValue[i] = secondHalf.constantValue[selector - getType(firstHalf.type).sizeInComponents];
+				}
+			}
+			break;
+		}
+
+		default:
+			// Other spec constant ops are possible, but require capabilities that are
+			// not exposed in our Vulkan implementation (eg Kernel), so we should never
+			// get here for correct shaders.
+			UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str());
 	}
 }
 
@@ -168,37 +168,37 @@
 
 		switch(opcode)
 		{
-		case spv::OpSConvert:
-		case spv::OpFConvert:
-		case spv::OpUConvert:
-			UNREACHABLE("Not possible until we have multiple bit widths");
-			break;
+			case spv::OpSConvert:
+			case spv::OpFConvert:
+			case spv::OpUConvert:
+				UNREACHABLE("Not possible until we have multiple bit widths");
+				break;
 
-		case spv::OpSNegate:
-			v = -(int)l;
-			break;
-		case spv::OpNot:
-		case spv::OpLogicalNot:
-			v = ~l;
-			break;
+			case spv::OpSNegate:
+				v = -(int)l;
+				break;
+			case spv::OpNot:
+			case spv::OpLogicalNot:
+				v = ~l;
+				break;
 
-		case spv::OpQuantizeToF16:
-		{
-			// Can do this nicer with host code, but want to perfectly mirror the reactor code we emit.
-			auto abs = bit_cast<float>(l & 0x7FFFFFFF);
-			auto sign = l & 0x80000000;
-			auto isZero = abs < 0.000061035f ? ~0u : 0u;
-			auto isInf = abs > 65504.0f ? ~0u : 0u;
-			auto isNaN = (abs != abs) ? ~0u : 0u;
-			auto isInfOrNan = isInf | isNaN;
-			v = l & 0xFFFFE000;
-			v &= ~isZero | 0x80000000;
-			v = sign | (isInfOrNan & 0x7F800000) | (~isInfOrNan & v);
-			v |= isNaN & 0x400000;
-			break;
-		}
-		default:
-			UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str());
+			case spv::OpQuantizeToF16:
+			{
+				// Can do this nicer with host code, but want to perfectly mirror the reactor code we emit.
+				auto abs = bit_cast<float>(l & 0x7FFFFFFF);
+				auto sign = l & 0x80000000;
+				auto isZero = abs < 0.000061035f ? ~0u : 0u;
+				auto isInf = abs > 65504.0f ? ~0u : 0u;
+				auto isNaN = (abs != abs) ? ~0u : 0u;
+				auto isInfOrNan = isInf | isNaN;
+				v = l & 0xFFFFE000;
+				v &= ~isZero | 0x80000000;
+				v = sign | (isInfOrNan & 0x7F800000) | (~isInfOrNan & v);
+				v |= isNaN & 0x400000;
+				break;
+			}
+			default:
+				UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str());
 		}
 	}
 }
@@ -220,93 +220,93 @@
 
 		switch(opcode)
 		{
-		case spv::OpIAdd:
-			v = l + r;
-			break;
-		case spv::OpISub:
-			v = l - r;
-			break;
-		case spv::OpIMul:
-			v = l * r;
-			break;
-		case spv::OpUDiv:
-			v = (r == 0) ? 0 : l / r;
-			break;
-		case spv::OpUMod:
-			v = (r == 0) ? 0 : l % r;
-			break;
-		case spv::OpSDiv:
-			if(r == 0) r = UINT32_MAX;
-			if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
-			v = static_cast<int32_t>(l) / static_cast<int32_t>(r);
-			break;
-		case spv::OpSRem:
-			if(r == 0) r = UINT32_MAX;
-			if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
-			v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
-			break;
-		case spv::OpSMod:
-			if(r == 0) r = UINT32_MAX;
-			if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
-			// Test if a signed-multiply would be negative.
-			v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
-			if((v & 0x80000000) != (r & 0x80000000))
-				v += r;
-			break;
-		case spv::OpShiftRightLogical:
-			v = l >> r;
-			break;
-		case spv::OpShiftRightArithmetic:
-			v = static_cast<int32_t>(l) >> r;
-			break;
-		case spv::OpShiftLeftLogical:
-			v = l << r;
-			break;
-		case spv::OpBitwiseOr:
-		case spv::OpLogicalOr:
-			v = l | r;
-			break;
-		case spv::OpBitwiseAnd:
-		case spv::OpLogicalAnd:
-			v = l & r;
-			break;
-		case spv::OpBitwiseXor:
-			v = l ^ r;
-			break;
-		case spv::OpLogicalEqual:
-		case spv::OpIEqual:
-			v = (l == r) ? ~0u : 0u;
-			break;
-		case spv::OpLogicalNotEqual:
-		case spv::OpINotEqual:
-			v = (l != r) ? ~0u : 0u;
-			break;
-		case spv::OpULessThan:
-			v = l < r ? ~0u : 0u;
-			break;
-		case spv::OpSLessThan:
-			v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~0u : 0u;
-			break;
-		case spv::OpUGreaterThan:
-			v = l > r ? ~0u : 0u;
-			break;
-		case spv::OpSGreaterThan:
-			v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~0u : 0u;
-			break;
-		case spv::OpULessThanEqual:
-			v = l <= r ? ~0u : 0u;
-			break;
-		case spv::OpSLessThanEqual:
-			v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~0u : 0u;
-			break;
-		case spv::OpUGreaterThanEqual:
-			v = l >= r ? ~0u : 0u;
-			break;
-		case spv::OpSGreaterThanEqual:
-			v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~0u : 0u;
-			break;
-		default:
-			UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str());
+			case spv::OpIAdd:
+				v = l + r;
+				break;
+			case spv::OpISub:
+				v = l - r;
+				break;
+			case spv::OpIMul:
+				v = l * r;
+				break;
+			case spv::OpUDiv:
+				v = (r == 0) ? 0 : l / r;
+				break;
+			case spv::OpUMod:
+				v = (r == 0) ? 0 : l % r;
+				break;
+			case spv::OpSDiv:
+				if(r == 0) r = UINT32_MAX;
+				if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
+				v = static_cast<int32_t>(l) / static_cast<int32_t>(r);
+				break;
+			case spv::OpSRem:
+				if(r == 0) r = UINT32_MAX;
+				if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
+				v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
+				break;
+			case spv::OpSMod:
+				if(r == 0) r = UINT32_MAX;
+				if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
+				// Test if a signed-multiply would be negative.
+				v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
+				if((v & 0x80000000) != (r & 0x80000000))
+					v += r;
+				break;
+			case spv::OpShiftRightLogical:
+				v = l >> r;
+				break;
+			case spv::OpShiftRightArithmetic:
+				v = static_cast<int32_t>(l) >> r;
+				break;
+			case spv::OpShiftLeftLogical:
+				v = l << r;
+				break;
+			case spv::OpBitwiseOr:
+			case spv::OpLogicalOr:
+				v = l | r;
+				break;
+			case spv::OpBitwiseAnd:
+			case spv::OpLogicalAnd:
+				v = l & r;
+				break;
+			case spv::OpBitwiseXor:
+				v = l ^ r;
+				break;
+			case spv::OpLogicalEqual:
+			case spv::OpIEqual:
+				v = (l == r) ? ~0u : 0u;
+				break;
+			case spv::OpLogicalNotEqual:
+			case spv::OpINotEqual:
+				v = (l != r) ? ~0u : 0u;
+				break;
+			case spv::OpULessThan:
+				v = l < r ? ~0u : 0u;
+				break;
+			case spv::OpSLessThan:
+				v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~0u : 0u;
+				break;
+			case spv::OpUGreaterThan:
+				v = l > r ? ~0u : 0u;
+				break;
+			case spv::OpSGreaterThan:
+				v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~0u : 0u;
+				break;
+			case spv::OpULessThanEqual:
+				v = l <= r ? ~0u : 0u;
+				break;
+			case spv::OpSLessThanEqual:
+				v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~0u : 0u;
+				break;
+			case spv::OpUGreaterThanEqual:
+				v = l >= r ? ~0u : 0u;
+				break;
+			case spv::OpSGreaterThanEqual:
+				v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~0u : 0u;
+				break;
+			default:
+				UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str());
 		}
 	}
 }
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp
index b2a731a..615bcf3 100644
--- a/src/Pipeline/VertexProgram.cpp
+++ b/src/Pipeline/VertexProgram.cpp
@@ -25,30 +25,27 @@
 namespace sw {
 
 VertexProgram::VertexProgram(
-		const VertexProcessor::State &state,
-		vk::PipelineLayout const *pipelineLayout,
-		SpirvShader const *spirvShader,
-		const vk::DescriptorSet::Bindings &descriptorSets)
-	: VertexRoutine(state, pipelineLayout, spirvShader),
-	  descriptorSets(descriptorSets)
+    const VertexProcessor::State &state,
+    vk::PipelineLayout const *pipelineLayout,
+    SpirvShader const *spirvShader,
+    const vk::DescriptorSet::Bindings &descriptorSets)
+    : VertexRoutine(state, pipelineLayout, spirvShader)
+    , descriptorSets(descriptorSets)
 {
 	routine.setImmutableInputBuiltins(spirvShader);
 
-	routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		assert(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID)))));
 	});
 
-	routine.setInputBuiltin(spirvShader, spv::BuiltInInstanceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine.setInputBuiltin(spirvShader, spv::BuiltInInstanceIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		// TODO: we could do better here; we know InstanceIndex is uniform across all lanes
 		assert(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, instanceID)))));
 	});
 
-	routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
-	{
+	routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
 	});
@@ -63,7 +60,7 @@
 {
 }
 
-void VertexProgram::program(Pointer<UInt> &batch, UInt& vertexCount)
+void VertexProgram::program(Pointer<UInt> &batch, UInt &vertexCount)
 {
 	auto it = spirvShader->inputBuiltins.find(spv::BuiltInVertexIndex);
 	if(it != spirvShader->inputBuiltins.end())
@@ -71,8 +68,8 @@
 		assert(it->second.SizeInComponents == 1);
 
 		routine.getVariable(it->second.Id)[it->second.FirstComponent] =
-				As<Float4>(*Pointer<Int4>(As<Pointer<Int4>>(batch)) +
-				           Int4(*Pointer<Int>(data + OFFSET(DrawData, baseVertex))));
+		    As<Float4>(*Pointer<Int4>(As<Pointer<Int4>>(batch)) +
+		               Int4(*Pointer<Int>(data + OFFSET(DrawData, baseVertex))));
 	}
 
 	auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
@@ -82,4 +79,4 @@
 	spirvShader->emitEpilog(&routine);
 }
 
-}  // namepsace sw
+}  // namespace sw
diff --git a/src/Pipeline/VertexProgram.hpp b/src/Pipeline/VertexProgram.hpp
index 9a14713..6abb727 100644
--- a/src/Pipeline/VertexProgram.hpp
+++ b/src/Pipeline/VertexProgram.hpp
@@ -15,8 +15,8 @@
 #ifndef sw_VertexProgram_hpp
 #define sw_VertexProgram_hpp
 
-#include "VertexRoutine.hpp"
 #include "ShaderCore.hpp"
+#include "VertexRoutine.hpp"
 
 namespace sw {
 
@@ -26,19 +26,19 @@
 {
 public:
 	VertexProgram(
-		const VertexProcessor::State &state,
-		vk::PipelineLayout const *pipelineLayout,
-		SpirvShader const *spirvShader,
-		const vk::DescriptorSet::Bindings &descriptorSets);
+	    const VertexProcessor::State &state,
+	    vk::PipelineLayout const *pipelineLayout,
+	    SpirvShader const *spirvShader,
+	    const vk::DescriptorSet::Bindings &descriptorSets);
 
 	virtual ~VertexProgram();
 
 private:
-	void program(Pointer<UInt> &batch, UInt& vertexCount) override;
+	void program(Pointer<UInt> &batch, UInt &vertexCount) override;
 
 	const vk::DescriptorSet::Bindings &descriptorSets;
 };
 
-}  // namepsace sw
+}  // namespace sw
 
-#endif   // sw_VertexProgram_hpp
+#endif  // sw_VertexProgram_hpp
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index 85c395a..517ac70 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -16,20 +16,20 @@
 
 #include "Constants.hpp"
 #include "SpirvShader.hpp"
-#include "Device/Vertex.hpp"
 #include "Device/Renderer.hpp"
-#include "Vulkan/VkDebug.hpp"
+#include "Device/Vertex.hpp"
 #include "System/Half.hpp"
+#include "Vulkan/VkDebug.hpp"
 
 namespace sw {
 
 VertexRoutine::VertexRoutine(
-		const VertexProcessor::State &state,
-		vk::PipelineLayout const *pipelineLayout,
-		SpirvShader const *spirvShader)
-	: routine(pipelineLayout),
-	  state(state),
-	  spirvShader(spirvShader)
+    const VertexProcessor::State &state,
+    vk::PipelineLayout const *pipelineLayout,
+    SpirvShader const *spirvShader)
+    : routine(pipelineLayout)
+    , state(state)
+    , spirvShader(spirvShader)
 {
 	spirvShader->emitProlog(&routine);
 }
@@ -40,13 +40,13 @@
 
 void VertexRoutine::generate()
 {
-	Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
-	Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
-	Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache,tag));
+	Pointer<Byte> cache = task + OFFSET(VertexTask, vertexCache);
+	Pointer<Byte> vertexCache = cache + OFFSET(VertexCache, vertex);
+	Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache, tag));
 
-	UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
+	UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask, vertexCount));
 
-	constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
+	constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, constants));
 
 	// Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer.
 	// On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache
@@ -93,7 +93,7 @@
 		   spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
 		   spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
 		{
-			Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4));
+			Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * (i / 4));
 			UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4));
 			Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex));
 			UInt robustnessSize(0);
@@ -129,19 +129,19 @@
 	Int4 minY = CmpNLE(-posW, posY);
 	Int4 minZ = CmpNLE(Float4(0.0f), posZ);
 
-	clipFlags =  Pointer<Int>(constants + OFFSET(Constants,maxX))[SignMask(maxX)];
-	clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxY))[SignMask(maxY)];
-	clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxZ))[SignMask(maxZ)];
-	clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minX))[SignMask(minX)];
-	clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minY))[SignMask(minY)];
-	clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minZ))[SignMask(minZ)];
+	clipFlags = Pointer<Int>(constants + OFFSET(Constants, maxX))[SignMask(maxX)];
+	clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxY))[SignMask(maxY)];
+	clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxZ))[SignMask(maxZ)];
+	clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minX))[SignMask(minX)];
+	clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minY))[SignMask(minY)];
+	clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minZ))[SignMask(minZ)];
 
-	Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
-	Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
-	Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
+	Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
+	Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
+	Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
 
 	Int4 finiteXYZ = finiteX & finiteY & finiteZ;
-	clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)];
+	clipFlags |= Pointer<Int>(constants + OFFSET(Constants, fini))[SignMask(finiteXYZ)];
 }
 
 void VertexRoutine::computeCullMask()
@@ -162,7 +162,7 @@
 }
 
 Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
-                                   bool robustBufferAccess, UInt & robustnessSize, Int baseVertex)
+                                   bool robustBufferAccess, UInt &robustnessSize, Int baseVertex)
 {
 	Vector4f v;
 	// Because of the following rule in the Vulkan spec, we do not care if a very large negative
@@ -193,7 +193,7 @@
 
 	switch(stream.type)
 	{
-	case STREAMTYPE_FLOAT:
+		case STREAMTYPE_FLOAT:
 		{
 			if(stream.count == 0)
 			{
@@ -220,86 +220,86 @@
 
 				switch(stream.attribType)
 				{
-				case SpirvShader::ATTRIBTYPE_INT:
-					if(stream.count >= 1) v.x = As<Float4>(Int4(v.x));
-					if(stream.count >= 2) v.x = As<Float4>(Int4(v.y));
-					if(stream.count >= 3) v.x = As<Float4>(Int4(v.z));
-					if(stream.count >= 4) v.x = As<Float4>(Int4(v.w));
-					break;
-				case SpirvShader::ATTRIBTYPE_UINT:
-					if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x));
-					if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y));
-					if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z));
-					if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w));
-					break;
-				default:
-					break;
+					case SpirvShader::ATTRIBTYPE_INT:
+						if(stream.count >= 1) v.x = As<Float4>(Int4(v.x));
+						if(stream.count >= 2) v.x = As<Float4>(Int4(v.y));
+						if(stream.count >= 3) v.x = As<Float4>(Int4(v.z));
+						if(stream.count >= 4) v.x = As<Float4>(Int4(v.w));
+						break;
+					case SpirvShader::ATTRIBTYPE_UINT:
+						if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x));
+						if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y));
+						if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z));
+						if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w));
+						break;
+					default:
+						break;
 				}
 			}
 		}
 		break;
-	case STREAMTYPE_BYTE:
-		if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float
-		{
-			v.x = Float4(*Pointer<Byte4>(source0));
-			v.y = Float4(*Pointer<Byte4>(source1));
-			v.z = Float4(*Pointer<Byte4>(source2));
-			v.w = Float4(*Pointer<Byte4>(source3));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
-			if(stream.normalized)
+		case STREAMTYPE_BYTE:
+			if(isNativeFloatAttrib)  // Stream: UByte, Shader attrib: Float
 			{
-				if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
-				if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
-				if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
-				if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
+				v.x = Float4(*Pointer<Byte4>(source0));
+				v.y = Float4(*Pointer<Byte4>(source1));
+				v.z = Float4(*Pointer<Byte4>(source2));
+				v.w = Float4(*Pointer<Byte4>(source3));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+				if(stream.normalized)
+				{
+					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+				}
 			}
-		}
-		else // Stream: UByte, Shader attrib: Int / UInt
-		{
-			v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
-			v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
-			v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
-			v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-		}
-		break;
-	case STREAMTYPE_SBYTE:
-		if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float
-		{
-			v.x = Float4(*Pointer<SByte4>(source0));
-			v.y = Float4(*Pointer<SByte4>(source1));
-			v.z = Float4(*Pointer<SByte4>(source2));
-			v.w = Float4(*Pointer<SByte4>(source3));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
-			if(stream.normalized)
+			else  // Stream: UByte, Shader attrib: Int / UInt
 			{
-				if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
-				if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
-				if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
-				if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
-			}
-		}
-		else // Stream: SByte, Shader attrib: Int / UInt
-		{
-			v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
-			v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
-			v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
-			v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
+				v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
+				v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
+				v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
+				v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
 
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-		}
-		break;
-	case STREAMTYPE_COLOR:
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
+			break;
+		case STREAMTYPE_SBYTE:
+			if(isNativeFloatAttrib)  // Stream: SByte, Shader attrib: Float
+			{
+				v.x = Float4(*Pointer<SByte4>(source0));
+				v.y = Float4(*Pointer<SByte4>(source1));
+				v.z = Float4(*Pointer<SByte4>(source2));
+				v.w = Float4(*Pointer<SByte4>(source3));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+				if(stream.normalized)
+				{
+					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+				}
+			}
+			else  // Stream: SByte, Shader attrib: Int / UInt
+			{
+				v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
+				v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
+				v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
+				v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
+			break;
+		case STREAMTYPE_COLOR:
 		{
-			v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
-			v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
-			v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
-			v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
+			v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+			v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+			v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+			v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
 
 			transpose4x4(v.x, v.y, v.z, v.w);
 
@@ -309,119 +309,119 @@
 			v.z = t;
 		}
 		break;
-	case STREAMTYPE_SHORT:
-		if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
-		{
-			v.x = Float4(*Pointer<Short4>(source0));
-			v.y = Float4(*Pointer<Short4>(source1));
-			v.z = Float4(*Pointer<Short4>(source2));
-			v.w = Float4(*Pointer<Short4>(source3));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
-			if(stream.normalized)
+		case STREAMTYPE_SHORT:
+			if(isNativeFloatAttrib)  // Stream: Int, Shader attrib: Float
 			{
-				if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
-				if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
-				if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
-				if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
+				v.x = Float4(*Pointer<Short4>(source0));
+				v.y = Float4(*Pointer<Short4>(source1));
+				v.z = Float4(*Pointer<Short4>(source2));
+				v.w = Float4(*Pointer<Short4>(source3));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+				if(stream.normalized)
+				{
+					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+				}
 			}
-		}
-		else // Stream: Short, Shader attrib: Int/UInt, no type conversion
-		{
-			v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
-			v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
-			v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
-			v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-		}
-		break;
-	case STREAMTYPE_USHORT:
-		if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
-		{
-			v.x = Float4(*Pointer<UShort4>(source0));
-			v.y = Float4(*Pointer<UShort4>(source1));
-			v.z = Float4(*Pointer<UShort4>(source2));
-			v.w = Float4(*Pointer<UShort4>(source3));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
-			if(stream.normalized)
+			else  // Stream: Short, Shader attrib: Int/UInt, no type conversion
 			{
-				if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
-				if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
-				if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
-				if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
+				v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
+				v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
+				v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
+				v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
 			}
-		}
-		else // Stream: UShort, Shader attrib: Int/UInt, no type conversion
-		{
-			v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
-			v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
-			v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
-			v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-		}
-		break;
-	case STREAMTYPE_INT:
-		if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
-		{
-			v.x = Float4(*Pointer<Int4>(source0));
-			v.y = Float4(*Pointer<Int4>(source1));
-			v.z = Float4(*Pointer<Int4>(source2));
-			v.w = Float4(*Pointer<Int4>(source3));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
-			if(stream.normalized)
+			break;
+		case STREAMTYPE_USHORT:
+			if(isNativeFloatAttrib)  // Stream: Int, Shader attrib: Float
 			{
-				if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
-				if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
-				if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
-				if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+				v.x = Float4(*Pointer<UShort4>(source0));
+				v.y = Float4(*Pointer<UShort4>(source1));
+				v.z = Float4(*Pointer<UShort4>(source2));
+				v.w = Float4(*Pointer<UShort4>(source3));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+				if(stream.normalized)
+				{
+					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+				}
 			}
-		}
-		else // Stream: Int, Shader attrib: Int/UInt, no type conversion
-		{
-			v.x = *Pointer<Float4>(source0);
-			v.y = *Pointer<Float4>(source1);
-			v.z = *Pointer<Float4>(source2);
-			v.w = *Pointer<Float4>(source3);
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-		}
-		break;
-	case STREAMTYPE_UINT:
-		if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float
-		{
-			v.x = Float4(*Pointer<UInt4>(source0));
-			v.y = Float4(*Pointer<UInt4>(source1));
-			v.z = Float4(*Pointer<UInt4>(source2));
-			v.w = Float4(*Pointer<UInt4>(source3));
-
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
-			if(stream.normalized)
+			else  // Stream: UShort, Shader attrib: Int/UInt, no type conversion
 			{
-				if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
-				if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
-				if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
-				if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
-			}
-		}
-		else // Stream: UInt, Shader attrib: Int/UInt, no type conversion
-		{
-			v.x = *Pointer<Float4>(source0);
-			v.y = *Pointer<Float4>(source1);
-			v.z = *Pointer<Float4>(source2);
-			v.w = *Pointer<Float4>(source3);
+				v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
+				v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
+				v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
+				v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
 
-			transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-		}
-		break;
-	case STREAMTYPE_HALF:
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
+			break;
+		case STREAMTYPE_INT:
+			if(isNativeFloatAttrib)  // Stream: Int, Shader attrib: Float
+			{
+				v.x = Float4(*Pointer<Int4>(source0));
+				v.y = Float4(*Pointer<Int4>(source1));
+				v.z = Float4(*Pointer<Int4>(source2));
+				v.w = Float4(*Pointer<Int4>(source3));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+				if(stream.normalized)
+				{
+					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+				}
+			}
+			else  // Stream: Int, Shader attrib: Int/UInt, no type conversion
+			{
+				v.x = *Pointer<Float4>(source0);
+				v.y = *Pointer<Float4>(source1);
+				v.z = *Pointer<Float4>(source2);
+				v.w = *Pointer<Float4>(source3);
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
+			break;
+		case STREAMTYPE_UINT:
+			if(isNativeFloatAttrib)  // Stream: UInt, Shader attrib: Float
+			{
+				v.x = Float4(*Pointer<UInt4>(source0));
+				v.y = Float4(*Pointer<UInt4>(source1));
+				v.z = Float4(*Pointer<UInt4>(source2));
+				v.w = Float4(*Pointer<UInt4>(source3));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+				if(stream.normalized)
+				{
+					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+				}
+			}
+			else  // Stream: UInt, Shader attrib: Int/UInt, no type conversion
+			{
+				v.x = *Pointer<Float4>(source0);
+				v.y = *Pointer<Float4>(source1);
+				v.z = *Pointer<Float4>(source2);
+				v.w = *Pointer<Float4>(source3);
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
+			break;
+		case STREAMTYPE_HALF:
 		{
 			if(stream.count >= 1)
 			{
@@ -430,10 +430,10 @@
 				UShort x2 = *Pointer<UShort>(source2 + 0);
 				UShort x3 = *Pointer<UShort>(source3 + 0);
 
-				v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4);
-				v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4);
-				v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4);
-				v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4);
+				v.x.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x0) * 4);
+				v.x.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x1) * 4);
+				v.x.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x2) * 4);
+				v.x.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x3) * 4);
 			}
 
 			if(stream.count >= 2)
@@ -443,10 +443,10 @@
 				UShort y2 = *Pointer<UShort>(source2 + 2);
 				UShort y3 = *Pointer<UShort>(source3 + 2);
 
-				v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4);
-				v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4);
-				v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4);
-				v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4);
+				v.y.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y0) * 4);
+				v.y.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y1) * 4);
+				v.y.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y2) * 4);
+				v.y.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y3) * 4);
 			}
 
 			if(stream.count >= 3)
@@ -456,10 +456,10 @@
 				UShort z2 = *Pointer<UShort>(source2 + 4);
 				UShort z3 = *Pointer<UShort>(source3 + 4);
 
-				v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4);
-				v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4);
-				v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4);
-				v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4);
+				v.z.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z0) * 4);
+				v.z.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z1) * 4);
+				v.z.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z2) * 4);
+				v.z.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z3) * 4);
 			}
 
 			if(stream.count >= 4)
@@ -469,14 +469,14 @@
 				UShort w2 = *Pointer<UShort>(source2 + 6);
 				UShort w3 = *Pointer<UShort>(source3 + 6);
 
-				v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4);
-				v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4);
-				v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4);
-				v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4);
+				v.w.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w0) * 4);
+				v.w.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w1) * 4);
+				v.w.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w2) * 4);
+				v.w.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w3) * 4);
 			}
 		}
 		break;
-	case STREAMTYPE_2_10_10_10_INT:
+		case STREAMTYPE_2_10_10_10_INT:
 		{
 			Int4 src;
 			src = Insert(src, *Pointer<Int>(source0), 0);
@@ -498,7 +498,7 @@
 			}
 		}
 		break;
-	case STREAMTYPE_2_10_10_10_UINT:
+		case STREAMTYPE_2_10_10_10_UINT:
 		{
 			Int4 src;
 			src = Insert(src, *Pointer<Int>(source0), 0);
@@ -520,8 +520,8 @@
 			}
 		}
 		break;
-	default:
-		UNSUPPORTED("stream.type %d", int(stream.type));
+		default:
+			UNSUPPORTED("stream.type %d", int(stream.type));
 	}
 
 	if(stream.count < 1) v.x = Float4(0.0f);
@@ -567,17 +567,17 @@
 	Float4 rhw = Float4(1.0f) / w;
 
 	Vector4f proj;
-	proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF))));
-	proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF))));
+	proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData, WxF))));
+	proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData, HxF))));
 	proj.z = pos.z * rhw;
 	proj.w = rhw;
 
 	transpose4x4(pos.x, pos.y, pos.z, pos.w);
 
-	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,position), 16) = pos.w;
-	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,position), 16) = pos.z;
-	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,position), 16) = pos.y;
-	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,position), 16) = pos.x;
+	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, position), 16) = pos.w;
+	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, position), 16) = pos.z;
+	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, position), 16) = pos.y;
+	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, position), 16) = pos.x;
 
 	it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize);
 	if(it != spirvShader->outputBuiltins.end())
@@ -585,10 +585,10 @@
 		ASSERT(it->second.SizeInComponents == 1);
 		auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent];
 
-		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,pointSize)) = Extract(psize, 3);
-		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,pointSize)) = Extract(psize, 2);
-		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,pointSize)) = Extract(psize, 1);
-		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0);
+		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, pointSize)) = Extract(psize, 3);
+		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, pointSize)) = Extract(psize, 2);
+		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, pointSize)) = Extract(psize, 1);
+		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, pointSize)) = Extract(psize, 0);
 	}
 
 	it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
@@ -598,10 +598,10 @@
 		for(unsigned int i = 0; i < count; i++)
 		{
 			auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
-			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 3);
-			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 2);
-			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 1);
-			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 0);
+			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 3);
+			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 2);
+			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 1);
+			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 0);
 		}
 	}
 
@@ -612,29 +612,29 @@
 		for(unsigned int i = 0; i < count; i++)
 		{
 			auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
-			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 3);
-			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 2);
-			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 1);
-			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 0);
+			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 3);
+			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 2);
+			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 1);
+			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 0);
 		}
 	}
 
-	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
-	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
-	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 8)  & 0x0000000FF;
-	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 0)  & 0x0000000FF;
+	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
+	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
+	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 8) & 0x0000000FF;
+	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 0) & 0x0000000FF;
 
-	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullMask)) = -((cullMask >> 3) & 1);
-	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullMask)) = -((cullMask >> 2) & 1);
-	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullMask)) = -((cullMask >> 1) & 1);
-	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullMask)) = -((cullMask >> 0) & 1);
+	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullMask)) = -((cullMask >> 3) & 1);
+	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullMask)) = -((cullMask >> 2) & 1);
+	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullMask)) = -((cullMask >> 1) & 1);
+	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullMask)) = -((cullMask >> 0) & 1);
 
 	transpose4x4(proj.x, proj.y, proj.z, proj.w);
 
-	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,projected), 16) = proj.w;
-	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,projected), 16) = proj.z;
-	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,projected), 16) = proj.y;
-	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,projected), 16) = proj.x;
+	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, projected), 16) = proj.w;
+	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, projected), 16) = proj.z;
+	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, projected), 16) = proj.y;
+	*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, projected), 16) = proj.x;
 
 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
 	{
@@ -651,22 +651,22 @@
 
 			transpose4x4(v.x, v.y, v.z, v.w);
 
-			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,v[i]), 16) = v.w;
-			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,v[i]), 16) = v.z;
-			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,v[i]), 16) = v.y;
-			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,v[i]), 16) = v.x;
+			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, v[i]), 16) = v.w;
+			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, v[i]), 16) = v.z;
+			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, v[i]), 16) = v.y;
+			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, v[i]), 16) = v.x;
 		}
 	}
 }
 
 void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry)
 {
-	*Pointer<Int4>(vertex + OFFSET(Vertex,position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,position));
-	*Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize));
+	*Pointer<Int4>(vertex + OFFSET(Vertex, position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, position));
+	*Pointer<Int>(vertex + OFFSET(Vertex, pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, pointSize));
 
-	*Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags));
-	*Pointer<Int>(vertex + OFFSET(Vertex,cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,cullMask));
-	*Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected));
+	*Pointer<Int>(vertex + OFFSET(Vertex, clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, clipFlags));
+	*Pointer<Int>(vertex + OFFSET(Vertex, cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, cullMask));
+	*Pointer<Int4>(vertex + OFFSET(Vertex, projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, projected));
 
 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
 	{
diff --git a/src/Pipeline/VertexRoutine.hpp b/src/Pipeline/VertexRoutine.hpp
index ab458b4..c490cd7 100644
--- a/src/Pipeline/VertexRoutine.hpp
+++ b/src/Pipeline/VertexRoutine.hpp
@@ -20,14 +20,21 @@
 #include "Device/Color.hpp"
 #include "Device/VertexProcessor.hpp"
 
-namespace vk { class PipelineLayout; }
+namespace vk {
+class PipelineLayout;
+}
 
 namespace sw {
 
 class VertexRoutinePrototype : public VertexRoutineFunction
 {
 public:
-	VertexRoutinePrototype() : vertex(Arg<0>()), batch(Arg<1>()), task(Arg<2>()), data(Arg<3>()) {}
+	VertexRoutinePrototype()
+	    : vertex(Arg<0>())
+	    , batch(Arg<1>())
+	    , task(Arg<2>())
+	    , data(Arg<3>())
+	{}
 	virtual ~VertexRoutinePrototype() {}
 
 protected:
@@ -41,9 +48,9 @@
 {
 public:
 	VertexRoutine(
-		const VertexProcessor::State &state,
-		vk::PipelineLayout const *pipelineLayout,
-		SpirvShader const *spirvShader);
+	    const VertexProcessor::State &state,
+	    vk::PipelineLayout const *pipelineLayout,
+	    SpirvShader const *spirvShader);
 	virtual ~VertexRoutine();
 
 	void generate();
@@ -57,15 +64,15 @@
 	SpirvRoutine routine;
 
 	const VertexProcessor::State &state;
-	SpirvShader const * const spirvShader;
+	SpirvShader const *const spirvShader;
 
 private:
-	virtual void program(Pointer<UInt> &batch, UInt& vertexCount) = 0;
+	virtual void program(Pointer<UInt> &batch, UInt &vertexCount) = 0;
 
 	typedef VertexProcessor::State::Input Stream;
 
 	Vector4f readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
-	                    bool robustBufferAccess, UInt& robustnessSize, Int baseVertex);
+	                    bool robustBufferAccess, UInt &robustnessSize, Int baseVertex);
 	void readInput(Pointer<UInt> &batch);
 	void computeClipFlags();
 	void computeCullMask();
@@ -75,4 +82,4 @@
 
 }  // namespace sw
 
-#endif   // sw_VertexRoutine_hpp
+#endif  // sw_VertexRoutine_hpp