src/Pipeline/SpirvShaderImage.cpp - SwiftShader.git - Git at Google

 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "SpirvShader.hpp"

 #include "System/Types.hpp"

 #include "Vulkan/VkDescriptorSetLayout.hpp"
 #include "Vulkan/VkPipelineLayout.hpp"

 #include <spirv/unified1/spirv.hpp>

 namespace sw {

 static vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
 {
 	switch(format)
 	{
 	case spv::ImageFormatUnknown: return VK_FORMAT_UNDEFINED;
 	case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
 	case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
 	case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
 	case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
 	case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
 	case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
 	case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
 	case spv::ImageFormatR11fG11fB10f: return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
 	case spv::ImageFormatR16f: return VK_FORMAT_R16_SFLOAT;
 	case spv::ImageFormatRgba16: return VK_FORMAT_R16G16B16A16_UNORM;
 	case spv::ImageFormatRgb10A2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
 	case spv::ImageFormatRg16: return VK_FORMAT_R16G16_UNORM;
 	case spv::ImageFormatRg8: return VK_FORMAT_R8G8_UNORM;
 	case spv::ImageFormatR16: return VK_FORMAT_R16_UNORM;
 	case spv::ImageFormatR8: return VK_FORMAT_R8_UNORM;
 	case spv::ImageFormatRgba16Snorm: return VK_FORMAT_R16G16B16A16_SNORM;
 	case spv::ImageFormatRg16Snorm: return VK_FORMAT_R16G16_SNORM;
 	case spv::ImageFormatRg8Snorm: return VK_FORMAT_R8G8_SNORM;
 	case spv::ImageFormatR16Snorm: return VK_FORMAT_R16_SNORM;
 	case spv::ImageFormatR8Snorm: return VK_FORMAT_R8_SNORM;
 	case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
 	case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
 	case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
 	case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
 	case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
 	case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
 	case spv::ImageFormatRg8i: return VK_FORMAT_R8G8_SINT;
 	case spv::ImageFormatR16i: return VK_FORMAT_R16_SINT;
 	case spv::ImageFormatR8i: return VK_FORMAT_R8_SINT;
 	case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
 	case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
 	case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
 	case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
 	case spv::ImageFormatRgb10a2ui: return VK_FORMAT_A2B10G10R10_UINT_PACK32;
 	case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
 	case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
 	case spv::ImageFormatRg8ui: return VK_FORMAT_R8G8_UINT;
 	case spv::ImageFormatR16ui: return VK_FORMAT_R16_UINT;
 	case spv::ImageFormatR8ui: return VK_FORMAT_R8_UINT;

 	default:
 		UNSUPPORTED("SPIR-V ImageFormat %u", format);
 		return VK_FORMAT_UNDEFINED;
 	}
 }

 EmitState::ImageInstruction::ImageInstruction(InsnIterator insn, const SpirvShader &shader, const EmitState &state)
     : ImageInstructionSignature(parseVariantAndMethod(insn))
     , position(insn.distanceFrom(shader.begin()))
 {
 	if(samplerMethod == Write)
 	{
 		imageId = insn.word(1);
 		coordinateId = insn.word(2);
 		texelId = insn.word(3);
 	}
 	else
 	{
 		resultTypeId = insn.resultTypeId();  // word(1)
 		resultId = insn.resultId();          // word(2)

 		if(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == TexelPointer)  // Samplerless
 		{
 			imageId = insn.word(3);
 		}
 		else
 		{
 			// sampledImageId is either the result of an OpSampledImage instruction or
 			// an externally combined sampler and image.
 			Object::ID sampledImageId = insn.word(3);

 			if(state.isSampledImage(sampledImageId))  // Result of an OpSampledImage instruction
 			{
 				const SampledImagePointer &sampledImage = state.getSampledImage(sampledImageId);
 				imageId = shader.getObject(sampledImageId).definition.word(3);
 				samplerId = sampledImage.samplerId;
 			}
 			else  // Combined image/sampler
 			{
 				imageId = sampledImageId;
 				samplerId = sampledImageId;
 			}
 		}

 		coordinateId = insn.word(4);
 	}

 	// `imageId` can represent either a Sampled Image, a samplerless Image, or a pointer to an Image.
 	// To get to the OpTypeImage operands, traverse the OpTypeSampledImage or OpTypePointer.
 	const Type &imageObjectType = shader.getObjectType(imageId);
 	const Type &imageReferenceType = (imageObjectType.opcode() == spv::OpTypeSampledImage)
 	                                     ? shader.getType(imageObjectType.definition.word(2))
 	                                     : imageObjectType;
 	const Type &imageType = ((imageReferenceType.opcode() == spv::OpTypePointer)
 	                             ? shader.getType(imageReferenceType.element)
 	                             : imageReferenceType);

 	ASSERT(imageType.opcode() == spv::OpTypeImage);
 	dim = imageType.definition.word(3);
 	arrayed = imageType.definition.word(5);
 	imageFormat = imageType.definition.word(8);

 	const Object &coordinateObject = shader.getObject(coordinateId);
 	const Type &coordinateType = shader.getType(coordinateObject);
 	coordinates = coordinateType.componentCount - (isProj() ? 1 : 0);

 	if(samplerMethod == TexelPointer)
 	{
 		sampleId = insn.word(5);
 		sample = !shader.getObject(sampleId).isConstantZero();
 	}

 	if(isDref())
 	{
 		drefId = insn.word(5);
 	}

 	if(samplerMethod == Gather)
 	{
 		gatherComponent = !isDref() ? shader.getObject(insn.word(5)).constantValue[0] : 0;
 	}

 	uint32_t operandsIndex = getImageOperandsIndex(insn);
 	uint32_t imageOperands = (operandsIndex != 0) ? insn.word(operandsIndex) : 0;  // The mask which indicates which operands are provided.

 	operandsIndex += 1;  // Advance to the first actual operand <id> location.

 	if(imageOperands & spv::ImageOperandsBiasMask)
 	{
 		ASSERT(samplerMethod == Bias);
 		lodOrBiasId = insn.word(operandsIndex);
 		operandsIndex += 1;
 		imageOperands &= ~spv::ImageOperandsBiasMask;
 	}

 	if(imageOperands & spv::ImageOperandsLodMask)
 	{
 		ASSERT(samplerMethod == Lod || samplerMethod == Fetch);
 		lodOrBiasId = insn.word(operandsIndex);
 		operandsIndex += 1;
 		imageOperands &= ~spv::ImageOperandsLodMask;
 	}

 	if(imageOperands & spv::ImageOperandsGradMask)
 	{
 		ASSERT(samplerMethod == Grad);
 		gradDxId = insn.word(operandsIndex + 0);
 		gradDyId = insn.word(operandsIndex + 1);
 		operandsIndex += 2;
 		imageOperands &= ~spv::ImageOperandsGradMask;

 		grad = shader.getObjectType(gradDxId).componentCount;
 	}

 	if(imageOperands & spv::ImageOperandsConstOffsetMask)
 	{
 		offsetId = insn.word(operandsIndex);
 		operandsIndex += 1;
 		imageOperands &= ~spv::ImageOperandsConstOffsetMask;

 		offset = shader.getObjectType(offsetId).componentCount;
 	}

 	if(imageOperands & spv::ImageOperandsSampleMask)
 	{
 		ASSERT(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == Write);
 		sampleId = insn.word(operandsIndex);
 		operandsIndex += 1;
 		imageOperands &= ~spv::ImageOperandsSampleMask;

 		sample = !shader.getObject(sampleId).isConstantZero();
 	}

 	// TODO(b/174475384)
 	if(imageOperands & spv::ImageOperandsZeroExtendMask)
 	{
 		ASSERT(samplerMethod == Read || samplerMethod == Write);
 		imageOperands &= ~spv::ImageOperandsZeroExtendMask;
 	}
 	else if(imageOperands & spv::ImageOperandsSignExtendMask)
 	{
 		ASSERT(samplerMethod == Read || samplerMethod == Write);
 		imageOperands &= ~spv::ImageOperandsSignExtendMask;
 	}

 	[[maybe_unused]] spv::Scope scope = spv::ScopeCrossDevice;  // "Whilst the CrossDevice scope is defined in SPIR-V, it is disallowed in Vulkan."

 	if(imageOperands & spv::ImageOperandsMakeTexelAvailableMask)
 	{
 		scope = static_cast<spv::Scope>(insn.word(operandsIndex));
 		operandsIndex += 1;
 		imageOperands &= ~spv::ImageOperandsMakeTexelAvailableMask;
 	}

 	if(imageOperands & spv::ImageOperandsMakeTexelVisibleMask)
 	{
 		scope = static_cast<spv::Scope>(insn.word(operandsIndex));
 		operandsIndex += 1;
 		imageOperands &= ~spv::ImageOperandsMakeTexelVisibleMask;
 	}

 	if(imageOperands & spv::ImageOperandsNonPrivateTexelMask)
 	{
 		imageOperands &= ~spv::ImageOperandsNonPrivateTexelMask;
 	}

 	if(imageOperands & spv::ImageOperandsVolatileTexelMask)
 	{
 		UNIMPLEMENTED("b/176819536");
 		imageOperands &= ~spv::ImageOperandsVolatileTexelMask;
 	}

 	if(imageOperands & spv::ImageOperandsNontemporalMask)
 	{
 		// Hints that the accessed texels are not likely
 		// to be accessed again in the near future.
 		imageOperands &= ~spv::ImageOperandsNontemporalMask;
 	}

 	// There should be no remaining image operands.
 	if(imageOperands != 0)
 	{
 		UNSUPPORTED("Image operands 0x%08X", imageOperands);
 	}
 }

 EmitState::ImageInstructionSignature EmitState::ImageInstruction::parseVariantAndMethod(InsnIterator insn)
 {
 	uint32_t imageOperands = getImageOperandsMask(insn);
 	bool bias = imageOperands & spv::ImageOperandsBiasMask;
 	bool grad = imageOperands & spv::ImageOperandsGradMask;

 	switch(insn.opcode())
 	{
 	case spv::OpImageSampleImplicitLod: return { None, bias ? Bias : Implicit };
 	case spv::OpImageSampleExplicitLod: return { None, grad ? Grad : Lod };
 	case spv::OpImageSampleDrefImplicitLod: return { Dref, bias ? Bias : Implicit };
 	case spv::OpImageSampleDrefExplicitLod: return { Dref, grad ? Grad : Lod };
 	case spv::OpImageSampleProjImplicitLod: return { Proj, bias ? Bias : Implicit };
 	case spv::OpImageSampleProjExplicitLod: return { Proj, grad ? Grad : Lod };
 	case spv::OpImageSampleProjDrefImplicitLod: return { ProjDref, bias ? Bias : Implicit };
 	case spv::OpImageSampleProjDrefExplicitLod: return { ProjDref, grad ? Grad : Lod };
 	case spv::OpImageGather: return { None, Gather };
 	case spv::OpImageDrefGather: return { Dref, Gather };
 	case spv::OpImageFetch: return { None, Fetch };
 	case spv::OpImageQueryLod: return { None, Query };
 	case spv::OpImageRead: return { None, Read };
 	case spv::OpImageWrite: return { None, Write };
 	case spv::OpImageTexelPointer: return { None, TexelPointer };

 	default:
 		ASSERT(false);
 		return { None, Implicit };
 	}
 }

 // Returns the instruction word index at which the Image Operands mask is located, or 0 if not present.
 uint32_t EmitState::ImageInstruction::getImageOperandsIndex(InsnIterator insn)
 {
 	switch(insn.opcode())
 	{
 	case spv::OpImageSampleImplicitLod:
 	case spv::OpImageSampleProjImplicitLod:
 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
 	case spv::OpImageSampleExplicitLod:
 	case spv::OpImageSampleProjExplicitLod:
 		return 5;  // "Either Lod or Grad image operands must be present."
 	case spv::OpImageSampleDrefImplicitLod:
 	case spv::OpImageSampleProjDrefImplicitLod:
 		return insn.wordCount() > 6 ? 6 : 0;  // Optional
 	case spv::OpImageSampleDrefExplicitLod:
 	case spv::OpImageSampleProjDrefExplicitLod:
 		return 6;  // "Either Lod or Grad image operands must be present."
 	case spv::OpImageGather:
 	case spv::OpImageDrefGather:
 		return insn.wordCount() > 6 ? 6 : 0;  // Optional
 	case spv::OpImageFetch:
 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
 	case spv::OpImageQueryLod:
 		ASSERT(insn.wordCount() == 5);
 		return 0;  // No image operands.
 	case spv::OpImageRead:
 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
 	case spv::OpImageWrite:
 		return insn.wordCount() > 4 ? 4 : 0;  // Optional
 	case spv::OpImageTexelPointer:
 		ASSERT(insn.wordCount() == 6);
 		return 0;  // No image operands.

 	default:
 		ASSERT(false);
 		return 0;
 	}
 }

 uint32_t EmitState::ImageInstruction::getImageOperandsMask(InsnIterator insn)
 {
 	uint32_t operandsIndex = getImageOperandsIndex(insn);
 	return (operandsIndex != 0) ? insn.word(operandsIndex) : 0;
 }

 EmitState::EmitResult EmitState::EmitImageSample(const ImageInstruction &instruction)
 {
 	auto &resultType = shader.getType(instruction.resultTypeId);
 	auto &result = createIntermediate(instruction.resultId, resultType.componentCount);
 	Array<SIMD::Float> out(4);

 	// TODO(b/153380916): When we're in a code path that is always executed,
 	// i.e. post-dominators of the entry block, we don't have to dynamically
 	// check whether any lanes are active, and can elide the jump.
 	If(AnyTrue(activeLaneMask()))
 	{
 		EmitImageSampleUnconditional(out, instruction);
 	}

 	for(auto i = 0u; i < resultType.componentCount; i++) { result.move(i, out[i]); }

 	return EmitResult::Continue;
 }

 void EmitState::EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction) const
 {
 	auto decorations = shader.GetDecorationsForId(instruction.imageId);

 	if(decorations.NonUniform)
 	{
 		SIMD::Int activeLaneMask = this->activeLaneMask();
 		SIMD::Pointer imagePointer = getImage(instruction.imageId);
 		// PerLane output
 		for(int laneIdx = 0; laneIdx < SIMD::Width; laneIdx++)
 		{
 			Array<SIMD::Float> laneOut(out.getArraySize());
 			If(Extract(activeLaneMask, laneIdx) != 0)
 			{
 				Pointer<Byte> imageDescriptor = imagePointer.getPointerForLane(laneIdx);  // vk::SampledImageDescriptor*
 				Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction, laneIdx);

 				Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);

 				callSamplerFunction(samplerFunction, laneOut, imageDescriptor, instruction);
 			}

 			for(int outIdx = 0; outIdx < out.getArraySize(); outIdx++)
 			{
 				out[outIdx] = Insert(out[outIdx], Extract(laneOut[outIdx], laneIdx), laneIdx);
 			}
 		}
 	}
 	else
 	{
 		Pointer<Byte> imageDescriptor = getImage(instruction.imageId).getUniformPointer();  // vk::SampledImageDescriptor*
 		Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction);

 		Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);

 		callSamplerFunction(samplerFunction, out, imageDescriptor, instruction);
 	}
 }

 Pointer<Byte> EmitState::getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const
 {
 	return ((instruction.samplerId == instruction.imageId) || (instruction.samplerId == 0)) ? imageDescriptor : getImage(instruction.samplerId).getUniformPointer();
 }

 Pointer<Byte> EmitState::getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, int laneIdx) const
 {
 	return ((instruction.samplerId == instruction.imageId) || (instruction.samplerId == 0)) ? imageDescriptor : getImage(instruction.samplerId).getPointerForLane(laneIdx);
 }

 Pointer<Byte> EmitState::lookupSamplerFunction(Pointer<Byte> imageDescriptor, Pointer<Byte> samplerDescriptor, const ImageInstruction &instruction) const
 {
 	Int samplerId = (instruction.samplerId != 0) ? *Pointer<rr::Int>(samplerDescriptor + OFFSET(vk::SampledImageDescriptor, samplerId)) : Int(0);

 	auto &cache = routine->samplerCache.at(instruction.position);
 	Bool cacheHit = (cache.imageDescriptor == imageDescriptor) && (cache.samplerId == samplerId);  // TODO(b/205566405): Skip sampler ID check for samplerless instructions.

 	If(!cacheHit)
 	{
 		rr::Int imageViewId = *Pointer<rr::Int>(imageDescriptor + OFFSET(vk::ImageDescriptor, imageViewId));
 		cache.function = Call(getImageSampler, routine->device, instruction.signature, samplerId, imageViewId);
 		cache.imageDescriptor = imageDescriptor;
 		cache.samplerId = samplerId;
 	}

 	return cache.function;
 }

 void EmitState::callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const
 {
 	Array<SIMD::Float> in(16);  // Maximum 16 input parameter components.

 	auto coordinate = Operand(shader, *this, instruction.coordinateId);

 	uint32_t i = 0;
 	for(; i < instruction.coordinates; i++)
 	{
 		if(instruction.isProj())
 		{
 			in[i] = coordinate.Float(i) / coordinate.Float(instruction.coordinates);  // TODO(b/129523279): Optimize using reciprocal.
 		}
 		else
 		{
 			in[i] = coordinate.Float(i);
 		}
 	}

 	if(instruction.isDref())
 	{
 		auto drefValue = Operand(shader, *this, instruction.drefId);

 		if(instruction.isProj())
 		{
 			in[i] = drefValue.Float(0) / coordinate.Float(instruction.coordinates);  // TODO(b/129523279): Optimize using reciprocal.
 		}
 		else
 		{
 			in[i] = drefValue.Float(0);
 		}

 		i++;
 	}

 	if(instruction.lodOrBiasId != 0)
 	{
 		auto lodValue = Operand(shader, *this, instruction.lodOrBiasId);
 		in[i] = lodValue.Float(0);
 		i++;
 	}
 	else if(instruction.gradDxId != 0)
 	{
 		auto dxValue = Operand(shader, *this, instruction.gradDxId);
 		auto dyValue = Operand(shader, *this, instruction.gradDyId);
 		ASSERT(dxValue.componentCount == dxValue.componentCount);

 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
 		{
 			in[i] = dxValue.Float(j);
 		}

 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
 		{
 			in[i] = dyValue.Float(j);
 		}
 	}
 	else if(instruction.samplerMethod == Fetch)
 	{
 		// The instruction didn't provide a lod operand, but the sampler's Fetch
 		// function requires one to be present. If no lod is supplied, the default
 		// is zero.
 		in[i] = As<SIMD::Float>(SIMD::Int(0));
 		i++;
 	}

 	if(instruction.offsetId != 0)
 	{
 		auto offsetValue = Operand(shader, *this, instruction.offsetId);

 		for(uint32_t j = 0; j < offsetValue.componentCount; j++, i++)
 		{
 			in[i] = As<SIMD::Float>(offsetValue.Int(j));  // Integer values, but transfered as float.
 		}
 	}

 	if(instruction.sample)
 	{
 		auto sampleValue = Operand(shader, *this, instruction.sampleId);
 		in[i] = As<SIMD::Float>(sampleValue.Int(0));
 	}

 	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture);  // sw::Texture*

 	Call<ImageSampler>(samplerFunction, texture, &in, &out, routine->constants);
 }

 EmitState::EmitResult EmitState::EmitImageQuerySizeLod(InsnIterator insn)
 {
 	auto &resultTy = shader.getType(insn.resultTypeId());
 	auto imageId = Object::ID(insn.word(3));
 	auto lodId = Object::ID(insn.word(4));

 	auto &dst = createIntermediate(insn.resultId(), resultTy.componentCount);
 	GetImageDimensions(resultTy, imageId, lodId, dst);

 	return EmitResult::Continue;
 }

 EmitState::EmitResult EmitState::EmitImageQuerySize(InsnIterator insn)
 {
 	auto &resultTy = shader.getType(insn.resultTypeId());
 	auto imageId = Object::ID(insn.word(3));
 	auto lodId = Object::ID(0);

 	auto &dst = createIntermediate(insn.resultId(), resultTy.componentCount);
 	GetImageDimensions(resultTy, imageId, lodId, dst);

 	return EmitResult::Continue;
 }

 void EmitState::GetImageDimensions(const Type &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
 {
 	auto &image = shader.getObject(imageId);
 	auto &imageType = shader.getType(image);

 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
 	bool isArrayed = imageType.definition.word(5) != 0;
 	uint32_t dimensions = resultTy.componentCount - (isArrayed ? 1 : 0);

 	const SpirvShader::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);

 	Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();

 	Int width;
 	Int height;
 	Int depth;

 	switch(descriptorType)
 	{
 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
 	case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
 		width = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
 		height = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
 		depth = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
 		break;
 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
 		width = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, width));
 		height = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, height));
 		depth = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, depth));
 		break;
 	default:
 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
 	}

 	if(lodId != 0)
 	{
 		auto lodVal = Operand(shader, *this, lodId);
 		ASSERT(lodVal.componentCount == 1);
 		auto lod = lodVal.Int(0);
 		auto one = SIMD::Int(1);

 		if(dimensions >= 1) dst.move(0, Max(SIMD::Int(width) >> lod, one));
 		if(dimensions >= 2) dst.move(1, Max(SIMD::Int(height) >> lod, one));
 		if(dimensions >= 3) dst.move(2, Max(SIMD::Int(depth) >> lod, one));
 	}
 	else
 	{

 		if(dimensions >= 1) dst.move(0, SIMD::Int(width));
 		if(dimensions >= 2) dst.move(1, SIMD::Int(height));
 		if(dimensions >= 3) dst.move(2, SIMD::Int(depth));
 	}

 	if(isArrayed)
 	{
 		dst.move(dimensions, SIMD::Int(depth));
 	}
 }

 EmitState::EmitResult EmitState::EmitImageQueryLevels(InsnIterator insn)
 {
 	auto &resultTy = shader.getType(insn.resultTypeId());
 	ASSERT(resultTy.componentCount == 1);
 	auto imageId = Object::ID(insn.word(3));

 	const SpirvShader::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);

 	Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();
 	Int mipLevels = 0;
 	switch(descriptorType)
 	{
 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
 		mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels));  // uint32_t
 		break;
 	default:
 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
 	}

 	auto &dst = createIntermediate(insn.resultId(), 1);
 	dst.move(0, SIMD::Int(mipLevels));

 	return EmitResult::Continue;
 }

 EmitState::EmitResult EmitState::EmitImageQuerySamples(InsnIterator insn)
 {
 	auto &resultTy = shader.getType(insn.resultTypeId());
 	ASSERT(resultTy.componentCount == 1);
 	auto imageId = Object::ID(insn.word(3));
 	auto imageTy = shader.getObjectType(imageId);
 	ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
 	ASSERT(imageTy.definition.word(3) == spv::Dim2D);
 	ASSERT(imageTy.definition.word(6 /* MS */) == 1);

 	const SpirvShader::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);

 	Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();
 	Int sampleCount = 0;
 	switch(descriptorType)
 	{
 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
 		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));  // uint32_t
 		break;
 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
 		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount));  // uint32_t
 		break;
 	default:
 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
 	}

 	auto &dst = createIntermediate(insn.resultId(), 1);
 	dst.move(0, SIMD::Int(sampleCount));

 	return EmitResult::Continue;
 }

 EmitState::TexelAddressData EmitState::setupTexelAddressData(SIMD::Int rowPitch, SIMD::Int slicePitch, SIMD::Int samplePitch, ImageInstructionSignature instruction, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, const SpirvRoutine *routine)
 {
 	TexelAddressData data;

 	data.isArrayed = instruction.arrayed;
 	data.dim = static_cast<spv::Dim>(instruction.dim);
 	data.texelSize = imageFormat.bytes();
 	data.dims = instruction.coordinates - (data.isArrayed ? 1 : 0);

 	data.u = coordinate[0];
 	data.v = SIMD::Int(0);

 	if(data.dims > 1)
 	{
 		data.v = coordinate[1];
 	}

 	if(data.dim == spv::DimSubpassData)
 	{
 		data.u += routine->windowSpacePosition[0];
 		data.v += routine->windowSpacePosition[1];
 	}

 	data.ptrOffset = data.u * SIMD::Int(data.texelSize);

 	if(data.dims > 1)
 	{
 		data.ptrOffset += data.v * rowPitch;
 	}

 	data.w = 0;
 	if((data.dims > 2) || data.isArrayed)
 	{
 		if(data.dims > 2)
 		{
 			data.w += coordinate[2];
 		}

 		if(data.isArrayed)
 		{
 			data.w += coordinate[data.dims];
 		}

 		data.ptrOffset += data.w * slicePitch;
 	}

 	if(data.dim == spv::DimSubpassData)
 	{
 		// Multiview input attachment access is to the layer corresponding to the current view
 		data.ptrOffset += SIMD::Int(routine->layer) * slicePitch;
 	}

 	if(instruction.sample)
 	{
 		data.ptrOffset += sample * samplePitch;
 	}

 	return data;
 }

 SIMD::Pointer EmitState::GetNonUniformTexelAddress(ImageInstructionSignature instruction, SIMD::Pointer descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, SIMD::Int activeLaneMask, const SpirvRoutine *routine)
 {
 	const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
 	auto rowPitch = (descriptor + (useStencilAspect
 	                                   ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
 	                                   : OFFSET(vk::StorageImageDescriptor, rowPitchBytes)))
 	                    .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
 	auto slicePitch = (descriptor + (useStencilAspect
 	                                     ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
 	                                     : OFFSET(vk::StorageImageDescriptor, slicePitchBytes)))
 	                      .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
 	auto samplePitch = (descriptor + (useStencilAspect
 	                                      ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
 	                                      : OFFSET(vk::StorageImageDescriptor, samplePitchBytes)))
 	                       .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);

 	auto texelData = setupTexelAddressData(rowPitch, slicePitch, samplePitch, instruction, coordinate, sample, imageFormat, routine);

 	// If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
 	// Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
 	if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
 	{
 		SIMD::UInt width = (descriptor + OFFSET(vk::StorageImageDescriptor, width)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
 		SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.u), width));

 		if(texelData.dims > 1)
 		{
 			SIMD::UInt height = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, height)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.v), height));
 		}

 		if((texelData.dims > 2) || texelData.isArrayed)
 		{
 			SIMD::UInt depth = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, depth)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
 			if(texelData.dim == spv::DimCube) { depth *= 6; }
 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.w), depth));
 		}

 		if(instruction.sample)
 		{
 			SIMD::UInt sampleCount = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
 		}

 		constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16;  // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
 		static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");

 		texelData.ptrOffset = (texelData.ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET));  // oob ? OOB_OFFSET : ptrOffset  // TODO: IfThenElse()
 	}

 	std::vector<Pointer<Byte>> imageBase(SIMD::Width);
 	for(int i = 0; i < SIMD::Width; i++)
 	{
 		imageBase[i] = *Pointer<Pointer<Byte>>(descriptor.getPointerForLane(i) + (useStencilAspect
 		                                                                              ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
 		                                                                              : OFFSET(vk::StorageImageDescriptor, ptr)));
 	}

 	return SIMD::Pointer(imageBase) + texelData.ptrOffset;
 }

 SIMD::Pointer EmitState::GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const SpirvRoutine *routine)
 {
 	const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
 	auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
 	                                                          ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
 	                                                          : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
 	auto slicePitch = SIMD::Int(
 	    *Pointer<Int>(descriptor + (useStencilAspect
 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
 	                                    : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
 	auto samplePitch = SIMD::Int(
 	    *Pointer<Int>(descriptor + (useStencilAspect
 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
 	                                    : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));

 	auto texelData = setupTexelAddressData(rowPitch, slicePitch, samplePitch, instruction, coordinate, sample, imageFormat, routine);

 	// If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
 	// Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
 	if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
 	{
 		SIMD::UInt width = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
 		SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.u), width));

 		if(texelData.dims > 1)
 		{
 			SIMD::UInt height = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.v), height));
 		}

 		if((texelData.dims > 2) || texelData.isArrayed)
 		{
 			UInt depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
 			if(texelData.dim == spv::DimCube) { depth *= 6; }
 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.w), SIMD::UInt(depth)));
 		}

 		if(instruction.sample)
 		{
 			SIMD::UInt sampleCount = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));
 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
 		}

 		constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16;  // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
 		static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");

 		texelData.ptrOffset = (texelData.ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET));  // oob ? OOB_OFFSET : ptrOffset  // TODO: IfThenElse()
 	}

 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(descriptor + (useStencilAspect
 	                                                                    ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
 	                                                                    : OFFSET(vk::StorageImageDescriptor, ptr)));

 	Int imageSizeInBytes = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sizeInBytes));

 	return SIMD::Pointer(imageBase, imageSizeInBytes, texelData.ptrOffset);
 }

 EmitState::EmitResult EmitState::EmitImageRead(const ImageInstruction &instruction)
 {
 	auto &resultType = shader.getObjectType(instruction.resultId);
 	auto &image = shader.getObject(instruction.imageId);
 	auto &imageType = shader.getType(image);

 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
 	auto dim = static_cast<spv::Dim>(instruction.dim);

 	auto coordinate = Operand(shader, *this, instruction.coordinateId);
 	const SpirvShader::DescriptorDecorations &d = shader.descriptorDecorations.at(instruction.imageId);

 	// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
 	// the renderpass data instead. In all other cases, we can use the format in the instruction.
 	vk::Format imageFormat = (dim == spv::DimSubpassData)
 	                             ? shader.inputAttachmentFormats[d.InputAttachmentIndex]
 	                             : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));

 	// Depth+Stencil image attachments select aspect based on the Sampled Type of the
 	// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
 	bool useStencilAspect = (imageFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
 	                         shader.getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);

 	if(useStencilAspect)
 	{
 		imageFormat = VK_FORMAT_S8_UINT;
 	}

 	auto &dst = createIntermediate(instruction.resultId, resultType.componentCount);
 	SIMD::Pointer ptr = getPointer(instruction.imageId);

 	SIMD::Int uvwa[4];
 	SIMD::Int sample;
 	const int texelSize = imageFormat.bytes();
 	// VK_EXT_image_robustness requires replacing out-of-bounds access with zero.
 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
 	auto robustness = OutOfBoundsBehavior::Nullify;

 	for(uint32_t i = 0; i < instruction.coordinates; i++)
 	{
 		uvwa[i] = coordinate.Int(i);
 	}
 	if(instruction.sample)
 	{
 		sample = Operand(shader, *this, instruction.sampleId).Int(0);
 	}

 	// Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
 	// TODO(b/160531165): Provide gather abstractions for various element sizes.
 	SIMD::Int packed[4];

 	SIMD::Pointer texelPtr = ptr.isBasePlusOffset
 	                             ? GetTexelAddress(instruction, ptr.getUniformPointer(), uvwa, sample, imageFormat, robustness, routine)
 	                             : GetNonUniformTexelAddress(instruction, ptr, uvwa, sample, imageFormat, robustness, activeLaneMask(), routine);
 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
 	{
 		for(auto i = 0; i < texelSize / 4; i++)
 		{
 			packed[i] = texelPtr.Load<SIMD::Int>(robustness, activeLaneMask());
 			texelPtr += sizeof(float);
 		}
 	}
 	else if(texelSize == 2)
 	{
 		SIMD::Int mask = activeLaneMask() & texelPtr.isInBounds(2, robustness);

 		for(int i = 0; i < SIMD::Width; i++)
 		{
 			If(Extract(mask, i) != 0)
 			{
 				packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.getPointerForLane(i))), i);
 			}
 		}
 	}
 	else if(texelSize == 1)
 	{
 		SIMD::Int mask = activeLaneMask() & texelPtr.isInBounds(1, robustness);
 		for(int i = 0; i < SIMD::Width; i++)
 		{
 			If(Extract(mask, i) != 0)
 			{
 				packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.getPointerForLane(i))), i);
 			}
 		}
 	}
 	else
 		UNREACHABLE("texelSize: %d", int(texelSize));

 	// Format support requirements here come from two sources:
 	// - Minimum required set of formats for loads from storage images
 	// - Any format supported as a color or depth/stencil attachment, for input attachments
 	switch(imageFormat)
 	{
 	case VK_FORMAT_R32G32B32A32_SFLOAT:
 	case VK_FORMAT_R32G32B32A32_SINT:
 	case VK_FORMAT_R32G32B32A32_UINT:
 		dst.move(0, packed[0]);
 		dst.move(1, packed[1]);
 		dst.move(2, packed[2]);
 		dst.move(3, packed[3]);
 		break;
 	case VK_FORMAT_R32_SINT:
 	case VK_FORMAT_R32_UINT:
 		dst.move(0, packed[0]);
 		// Fill remaining channels with 0,0,1 (of the correct type)
 		dst.move(1, SIMD::Int(0));
 		dst.move(2, SIMD::Int(0));
 		dst.move(3, SIMD::Int(1));
 		break;
 	case VK_FORMAT_R32_SFLOAT:
 	case VK_FORMAT_D32_SFLOAT:
 	case VK_FORMAT_D32_SFLOAT_S8_UINT:
 		dst.move(0, packed[0]);
 		// Fill remaining channels with 0,0,1 (of the correct type)
 		dst.move(1, SIMD::Float(0.0f));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_D16_UNORM:
 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
 		dst.move(1, SIMD::Float(0.0f));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R16G16B16A16_UNORM:
 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
 		dst.move(1, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
 		dst.move(2, SIMD::Float(packed[1] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
 		dst.move(3, SIMD::Float((packed[1] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
 		break;
 	case VK_FORMAT_R16G16B16A16_SNORM:
 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
 		dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
 		dst.move(2, Max(SIMD::Float((packed[1] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
 		dst.move(3, Max(SIMD::Float(packed[1] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
 		break;
 	case VK_FORMAT_R16G16B16A16_SINT:
 		dst.move(0, (packed[0] << 16) >> 16);
 		dst.move(1, packed[0] >> 16);
 		dst.move(2, (packed[1] << 16) >> 16);
 		dst.move(3, packed[1] >> 16);
 		break;
 	case VK_FORMAT_R16G16B16A16_UINT:
 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
 		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
 		dst.move(2, packed[1] & SIMD::Int(0xFFFF));
 		dst.move(3, (packed[1] >> 16) & SIMD::Int(0xFFFF));
 		break;
 	case VK_FORMAT_R16G16B16A16_SFLOAT:
 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
 		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
 		dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
 		dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
 		break;
 	case VK_FORMAT_R8G8B8A8_SNORM:
 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
 		dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
 		dst.move(2, Max(SIMD::Float((packed[0] << 8) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
 		dst.move(3, Max(SIMD::Float((packed[0]) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
 		break;
 	case VK_FORMAT_R8G8B8A8_UNORM:
 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		dst.move(2, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		break;
 	case VK_FORMAT_R8G8B8A8_SRGB:
 	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
 		dst.move(0, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
 		dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
 		dst.move(2, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		break;
 	case VK_FORMAT_B8G8R8A8_UNORM:
 		dst.move(0, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		dst.move(2, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		break;
 	case VK_FORMAT_B8G8R8A8_SRGB:
 		dst.move(0, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
 		dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
 		dst.move(2, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		break;
 	case VK_FORMAT_R8G8B8A8_UINT:
 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
 		dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
 		dst.move(2, (As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF));
 		dst.move(3, (As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF));
 		break;
 	case VK_FORMAT_R8G8B8A8_SINT:
 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
 		dst.move(0, (packed[0] << 24) >> 24);
 		dst.move(1, (packed[0] << 16) >> 24);
 		dst.move(2, (packed[0] << 8) >> 24);
 		dst.move(3, packed[0] >> 24);
 		break;
 	case VK_FORMAT_R8_UNORM:
 		dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 0xFF));
 		dst.move(1, SIMD::Float(0.0f));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R8_SNORM:
 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
 		dst.move(1, SIMD::Float(0.0f));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R8_UINT:
 	case VK_FORMAT_S8_UINT:
 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
 		dst.move(1, SIMD::UInt(0));
 		dst.move(2, SIMD::UInt(0));
 		dst.move(3, SIMD::UInt(1));
 		break;
 	case VK_FORMAT_R8_SINT:
 		dst.move(0, (packed[0] << 24) >> 24);
 		dst.move(1, SIMD::Int(0));
 		dst.move(2, SIMD::Int(0));
 		dst.move(3, SIMD::Int(1));
 		break;
 	case VK_FORMAT_R8G8_UNORM:
 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R8G8_SNORM:
 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
 		dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R8G8_UINT:
 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
 		dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
 		dst.move(2, SIMD::UInt(0));
 		dst.move(3, SIMD::UInt(1));
 		break;
 	case VK_FORMAT_R8G8_SINT:
 		dst.move(0, (packed[0] << 24) >> 24);
 		dst.move(1, (packed[0] << 16) >> 24);
 		dst.move(2, SIMD::Int(0));
 		dst.move(3, SIMD::Int(1));
 		break;
 	case VK_FORMAT_R16_SFLOAT:
 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
 		dst.move(1, SIMD::Float(0.0f));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R16_UNORM:
 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
 		dst.move(1, SIMD::Float(0.0f));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R16_SNORM:
 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
 		dst.move(1, SIMD::Float(0.0f));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R16_UINT:
 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
 		dst.move(1, SIMD::UInt(0));
 		dst.move(2, SIMD::UInt(0));
 		dst.move(3, SIMD::UInt(1));
 		break;
 	case VK_FORMAT_R16_SINT:
 		dst.move(0, (packed[0] << 16) >> 16);
 		dst.move(1, SIMD::Int(0));
 		dst.move(2, SIMD::Int(0));
 		dst.move(3, SIMD::Int(1));
 		break;
 	case VK_FORMAT_R16G16_SFLOAT:
 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
 		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R16G16_UNORM:
 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
 		dst.move(1, SIMD::Float(As<SIMD::UInt>(packed[0]) >> 16) * SIMD::Float(1.0f / 0xFFFF));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R16G16_SNORM:
 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
 		dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R16G16_UINT:
 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
 		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
 		dst.move(2, SIMD::UInt(0));
 		dst.move(3, SIMD::UInt(1));
 		break;
 	case VK_FORMAT_R16G16_SINT:
 		dst.move(0, (packed[0] << 16) >> 16);
 		dst.move(1, packed[0] >> 16);
 		dst.move(2, SIMD::Int(0));
 		dst.move(3, SIMD::Int(1));
 		break;
 	case VK_FORMAT_R32G32_SINT:
 	case VK_FORMAT_R32G32_UINT:
 		dst.move(0, packed[0]);
 		dst.move(1, packed[1]);
 		dst.move(2, SIMD::Int(0));
 		dst.move(3, SIMD::Int(1));
 		break;
 	case VK_FORMAT_R32G32_SFLOAT:
 		dst.move(0, packed[0]);
 		dst.move(1, packed[1]);
 		dst.move(2, SIMD::Float(0.0f));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
 		dst.move(0, packed[0] & SIMD::Int(0x3FF));
 		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
 		dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
 		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
 		break;
 	case VK_FORMAT_A2R10G10B10_UINT_PACK32:
 		dst.move(2, packed[0] & SIMD::Int(0x3FF));
 		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
 		dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
 		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
 		break;
 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
 		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
 		dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
 		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
 		break;
 	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
 		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
 		dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
 		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
 		break;
 	case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(2, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		break;
 	case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(2, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		break;
 	case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		break;
 	case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(2, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
 		break;
 	case VK_FORMAT_R5G6B5_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_B5G6R5_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
 		dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(2, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
 		break;
 	case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
 		break;
 	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
 		dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 		dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
 		break;
 	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
 		dst.move(0, halfToFloatBits((packed[0] << 4) & SIMD::Int(0x7FF0)));
 		dst.move(1, halfToFloatBits((packed[0] >> 7) & SIMD::Int(0x7FF0)));
 		dst.move(2, halfToFloatBits((packed[0] >> 17) & SIMD::Int(0x7FE0)));
 		dst.move(3, SIMD::Float(1.0f));
 		break;
 	default:
 		UNSUPPORTED("VkFormat %d", int(imageFormat));
 		break;
 	}

 	return EmitResult::Continue;
 }

 EmitState::EmitResult EmitState::EmitImageWrite(const ImageInstruction &instruction)
 {
 	auto &image = shader.getObject(instruction.imageId);
 	auto &imageType = shader.getType(image);

 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
 	ASSERT(static_cast<spv::Dim>(instruction.dim) != spv::DimSubpassData);  // "Its Dim operand must not be SubpassData."

 	auto coordinate = Operand(shader, *this, instruction.coordinateId);
 	auto texel = Operand(shader, *this, instruction.texelId);

 	Array<SIMD::Int> coord(5);  // uvwa & sample

 	uint32_t i = 0;
 	for(; i < instruction.coordinates; i++)
 	{
 		coord[i] = coordinate.Int(i);
 	}

 	if(instruction.sample)
 	{
 		coord[i] = Operand(shader, *this, instruction.sampleId).Int(0);
 	}

 	Array<SIMD::Int> texelAndMask(5);
 	texelAndMask[0] = texel.Int(0);
 	texelAndMask[1] = texel.Int(1);
 	texelAndMask[2] = texel.Int(2);
 	texelAndMask[3] = texel.Int(3);
 	texelAndMask[4] = activeStoresAndAtomicsMask();

 	vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));

 	SIMD::Pointer ptr = getPointer(instruction.imageId);
 	if(ptr.isBasePlusOffset)
 	{
 		Pointer<Byte> imageDescriptor = ptr.getUniformPointer();  // vk::StorageImageDescriptor* or vk::SampledImageDescriptor*
 		Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction);

 		if(imageFormat == VK_FORMAT_UNDEFINED)  // spv::ImageFormatUnknown
 		{
 			Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);

 			Call<ImageSampler>(samplerFunction, imageDescriptor, &coord, &texelAndMask, routine->constants);
 		}
 		else
 		{
 			WriteImage(instruction, imageDescriptor, &coord, &texelAndMask, imageFormat);
 		}
 	}
 	else
 	{
 		for(int j = 0; j < SIMD::Width; j++)
 		{
 			SIMD::Int singleLaneMask = 0;
 			singleLaneMask = Insert(singleLaneMask, 0xffffffff, j);
 			texelAndMask[4] = activeStoresAndAtomicsMask() & singleLaneMask;
 			Pointer<Byte> imageDescriptor = ptr.getPointerForLane(j);
 			Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction, j);

 			if(imageFormat == VK_FORMAT_UNDEFINED)  // spv::ImageFormatUnknown
 			{
 				Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);

 				Call<ImageSampler>(samplerFunction, imageDescriptor, &coord, &texelAndMask, routine->constants);
 			}
 			else
 			{
 				WriteImage(instruction, imageDescriptor, &coord, &texelAndMask, imageFormat);
 			}
 		}
 	}

 	return EmitResult::Continue;
 }

 void EmitState::WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat)
 {
 	SIMD::Int texel[4];
 	texel[0] = texelAndMask[0];
 	texel[1] = texelAndMask[1];
 	texel[2] = texelAndMask[2];
 	texel[3] = texelAndMask[3];
 	SIMD::Int mask = texelAndMask[4];

 	SIMD::Int packed[4];
 	switch(imageFormat)
 	{
 	case VK_FORMAT_R32G32B32A32_SFLOAT:
 	case VK_FORMAT_R32G32B32A32_SINT:
 	case VK_FORMAT_R32G32B32A32_UINT:
 		packed[0] = texel[0];
 		packed[1] = texel[1];
 		packed[2] = texel[2];
 		packed[3] = texel[3];
 		break;
 	case VK_FORMAT_R32_SFLOAT:
 	case VK_FORMAT_R32_SINT:
 	case VK_FORMAT_R32_UINT:
 		packed[0] = texel[0];
 		break;
 	case VK_FORMAT_R8G8B8A8_UNORM:
 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
 		break;
 	case VK_FORMAT_B8G8R8A8_UNORM:
 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
 		break;
 	case VK_FORMAT_B8G8R8A8_SRGB:
 		packed[0] = (SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[2])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
 		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[1])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
 		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[0])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
 		break;
 	case VK_FORMAT_R8G8B8A8_SNORM:
 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
 		             SIMD::Int(0xFF)) |
 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
 		              SIMD::Int(0xFF))
 		             << 8) |
 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
 		              SIMD::Int(0xFF))
 		             << 16) |
 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
 		              SIMD::Int(0xFF))
 		             << 24);
 		break;
 	case VK_FORMAT_R8G8B8A8_SINT:
 	case VK_FORMAT_R8G8B8A8_UINT:
 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
 		packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xff))) |
 		            (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xff)) << 8) |
 		            (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xff)) << 16) |
 		            (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xff)) << 24);
 		break;
 	case VK_FORMAT_R16G16B16A16_SFLOAT:
 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
 		packed[1] = floatToHalfBits(As<SIMD::UInt>(texel[2]), false) | floatToHalfBits(As<SIMD::UInt>(texel[3]), true);
 		break;
 	case VK_FORMAT_R16G16B16A16_SINT:
 	case VK_FORMAT_R16G16B16A16_UINT:
 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
 		packed[1] = SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xFFFF)) << 16);
 		break;
 	case VK_FORMAT_R32G32_SFLOAT:
 	case VK_FORMAT_R32G32_SINT:
 	case VK_FORMAT_R32G32_UINT:
 		packed[0] = texel[0];
 		packed[1] = texel[1];
 		break;
 	case VK_FORMAT_R16G16_SFLOAT:
 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
 		break;
 	case VK_FORMAT_R16G16_SINT:
 	case VK_FORMAT_R16G16_UINT:
 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
 		break;
 	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
 		// Truncates instead of rounding. See b/147900455
 		packed[0] = ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) >> 4) |
 		            ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) << 7) |
 		            ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FE0)) << 17);
 		break;
 	case VK_FORMAT_R16_SFLOAT:
 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false);
 		break;
 	case VK_FORMAT_R16G16B16A16_UNORM:
 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
 		packed[1] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
 		break;
 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 10) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 20) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3)))) << 30);
 		break;
 	case VK_FORMAT_R16G16_UNORM:
 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
 		break;
 	case VK_FORMAT_R8G8_UNORM:
 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) |
 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) << 8);
 		break;
 	case VK_FORMAT_R16_UNORM:
 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF)));
 		break;
 	case VK_FORMAT_R8_UNORM:
 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF)));
 		break;
 	case VK_FORMAT_R16G16B16A16_SNORM:
 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
 		packed[1] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
 		break;
 	case VK_FORMAT_R16G16_SNORM:
 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
 		break;
 	case VK_FORMAT_R8G8_SNORM:
 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) & SIMD::Int(0xFF)) |
 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) << 8);
 		break;
 	case VK_FORMAT_R16_SNORM:
 		packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF)));
 		break;
 	case VK_FORMAT_R8_SNORM:
 		packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F)));
 		break;
 	case VK_FORMAT_R8G8_SINT:
 	case VK_FORMAT_R8G8_UINT:
 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFF)) << 8);
 		break;
 	case VK_FORMAT_R16_SINT:
 	case VK_FORMAT_R16_UINT:
 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF));
 		break;
 	case VK_FORMAT_R8_SINT:
 	case VK_FORMAT_R8_UINT:
 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF));
 		break;
 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
 		packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0x3FF))) |
 		            (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0x3FF)) << 10) |
 		            (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0x3FF)) << 20) |
 		            (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0x3)) << 30);
 		break;
 	default:
 		UNSUPPORTED("VkFormat %d", int(imageFormat));
 		break;
 	}

 	// "The integer texel coordinates are validated according to the same rules as for texel input coordinate
 	//  validation. If the texel fails integer texel coordinate validation, then the write has no effect."
 	// - https://www.khronos.org/registry/vulkan/specs/1.2/html/chap16.html#textures-output-coordinate-validation
 	auto robustness = OutOfBoundsBehavior::Nullify;
 	// GetTexelAddress() only needs the SpirvRoutine* for SubpassData accesses (i.e. input attachments).
 	const SpirvRoutine *routine = nullptr;

 	SIMD::Int uvwa[4];
 	SIMD::Int sample;

 	uint32_t i = 0;
 	for(; i < instruction.coordinates; i++)
 	{
 		uvwa[i] = As<SIMD::Int>(coord[i]);
 	}

 	if(instruction.sample)
 	{
 		sample = As<SIMD::Int>(coord[i]);
 	}

 	auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, routine);

 	const int texelSize = imageFormat.bytes();

 	// Scatter packed texel data.
 	// TODO(b/160531165): Provide scatter abstractions for various element sizes.
 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
 	{
 		for(auto i = 0; i < texelSize / 4; i++)
 		{
 			texelPtr.Store(packed[i], robustness, mask);
 			texelPtr += sizeof(float);
 		}
 	}
 	else if(texelSize == 2)
 	{
 		mask = mask & texelPtr.isInBounds(2, robustness);

 		for(int i = 0; i < SIMD::Width; i++)
 		{
 			If(Extract(mask, i) != 0)
 			{
 				*Pointer<Short>(texelPtr.getPointerForLane(i)) = Short(Extract(packed[0], i));
 			}
 		}
 	}
 	else if(texelSize == 1)
 	{
 		mask = mask & texelPtr.isInBounds(1, robustness);

 		for(int i = 0; i < SIMD::Width; i++)
 		{
 			If(Extract(mask, i) != 0)
 			{
 				*Pointer<Byte>(texelPtr.getPointerForLane(i)) = Byte(Extract(packed[0], i));
 			}
 		}
 	}
 	else
 		UNREACHABLE("texelSize: %d", int(texelSize));
 }

 EmitState::EmitResult EmitState::EmitImageTexelPointer(const ImageInstruction &instruction)
 {
 	auto coordinate = Operand(shader, *this, instruction.coordinateId);

 	SIMD::Pointer ptr = getPointer(instruction.imageId);

 	// VK_EXT_image_robustness requires checking for out-of-bounds accesses.
 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
 	auto robustness = OutOfBoundsBehavior::Nullify;
 	vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));

 	SIMD::Int uvwa[4];

 	for(uint32_t i = 0; i < instruction.coordinates; i++)
 	{
 		uvwa[i] = coordinate.Int(i);
 	}

 	SIMD::Int sample = Operand(shader, *this, instruction.sampleId).Int(0);

 	auto texelPtr = ptr.isBasePlusOffset
 	                    ? GetTexelAddress(instruction, ptr.getUniformPointer(), uvwa, sample, imageFormat, robustness, routine)
 	                    : GetNonUniformTexelAddress(instruction, ptr, uvwa, sample, imageFormat, robustness, activeLaneMask(), routine);

 	createPointer(instruction.resultId, texelPtr);

 	return EmitResult::Continue;
 }

 EmitState::EmitResult EmitState::EmitSampledImage(InsnIterator insn)
 {
 	Object::ID resultId = insn.word(2);
 	Object::ID imageId = insn.word(3);
 	Object::ID samplerId = insn.word(4);

 	// Create a sampled image, containing both a sampler and an image
 	createSampledImage(resultId, { getPointer(imageId), samplerId });

 	return EmitResult::Continue;
 }

 EmitState::EmitResult EmitState::EmitImage(InsnIterator insn)
 {
 	Object::ID resultId = insn.word(2);
 	Object::ID imageId = insn.word(3);

 	// Extract the image from a sampled image.
 	createPointer(resultId, getImage(imageId));

 	return EmitResult::Continue;
 }

 }  // namespace sw