src/Pipeline/SpirvShaderControlFlow.cpp - SwiftShader - Git at Google

 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "SpirvShader.hpp"

 #include "Reactor/Coroutine.hpp" // rr::Yield

 #include "ShaderCore.hpp"

 #include <spirv/unified1/spirv.hpp>

 #include <queue>

 namespace sw {

 SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
 {
 	// Default to a Simple, this may change later.
 	kind = Block::Simple;

 	// Walk the instructions to find the last two of the block.
 	InsnIterator insns[2];
 	for (auto insn : *this)
 	{
 		insns[0] = insns[1];
 		insns[1] = insn;
 	}

 	switch (insns[1].opcode())
 	{
 		case spv::OpBranch:
 			branchInstruction = insns[1];
 			outs.emplace(Block::ID(branchInstruction.word(1)));

 			switch (insns[0].opcode())
 			{
 				case spv::OpLoopMerge:
 					kind = Loop;
 					mergeInstruction = insns[0];
 					mergeBlock = Block::ID(mergeInstruction.word(1));
 					continueTarget = Block::ID(mergeInstruction.word(2));
 					break;

 				default:
 					kind = Block::Simple;
 					break;
 			}
 			break;

 		case spv::OpBranchConditional:
 			branchInstruction = insns[1];
 			outs.emplace(Block::ID(branchInstruction.word(2)));
 			outs.emplace(Block::ID(branchInstruction.word(3)));

 			switch (insns[0].opcode())
 			{
 				case spv::OpSelectionMerge:
 					kind = StructuredBranchConditional;
 					mergeInstruction = insns[0];
 					mergeBlock = Block::ID(mergeInstruction.word(1));
 					break;

 				case spv::OpLoopMerge:
 					kind = Loop;
 					mergeInstruction = insns[0];
 					mergeBlock = Block::ID(mergeInstruction.word(1));
 					continueTarget = Block::ID(mergeInstruction.word(2));
 					break;

 				default:
 					kind = UnstructuredBranchConditional;
 					break;
 			}
 			break;

 		case spv::OpSwitch:
 			branchInstruction = insns[1];
 			outs.emplace(Block::ID(branchInstruction.word(2)));
 			for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
 			{
 				outs.emplace(Block::ID(branchInstruction.word(w)));
 			}

 			switch (insns[0].opcode())
 			{
 				case spv::OpSelectionMerge:
 					kind = StructuredSwitch;
 					mergeInstruction = insns[0];
 					mergeBlock = Block::ID(mergeInstruction.word(1));
 					break;

 				default:
 					kind = UnstructuredSwitch;
 					break;
 			}
 			break;

 		default:
 			break;
 	}
 }

 void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable) const
 {
 	if (reachable.count(id) == 0)
 	{
 		reachable.emplace(id);
 		for (auto out : getBlock(id).outs)
 		{
 			TraverseReachableBlocks(out, reachable);
 		}
 	}
 }

 void SpirvShader::Function::AssignBlockFields()
 {
 	Block::Set reachable;
 	TraverseReachableBlocks(entry, reachable);

 	for (auto &it : blocks)
 	{
 		auto &blockId = it.first;
 		auto &block = it.second;
 		if (reachable.count(blockId) > 0)
 		{
 			for (auto &outId : it.second.outs)
 			{
 				auto outIt = blocks.find(outId);
 				ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
 				auto &out = outIt->second;
 				out.ins.emplace(blockId);
 			}
 			if (block.kind == Block::Loop)
 			{
 				auto mergeIt = blocks.find(block.mergeBlock);
 				ASSERT_MSG(mergeIt != blocks.end(), "Loop block %d has a non-existent merge block %d", blockId.value(), block.mergeBlock.value());
 				mergeIt->second.isLoopMerge = true;
 			}
 		}
 	}
 }

 void SpirvShader::Function::ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const
 {
 	auto block = getBlock(blockId);
 	for (auto dep : block.ins)
 	{
 		if (block.kind != Block::Loop ||                 // if not a loop...
 			!ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
 		{
 			f(dep);
 		}
 	}
 }

 bool SpirvShader::Function::ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
 {
 	// TODO: Optimize: This can be cached on the block.
 	Block::Set seen;
 	seen.emplace(notPassingThrough);

 	std::queue<Block::ID> pending;
 	pending.emplace(from);

 	while (pending.size() > 0)
 	{
 		auto id = pending.front();
 		pending.pop();
 		for (auto out : getBlock(id).outs)
 		{
 			if (seen.count(out) != 0) { continue; }
 			if (out == to) { return true; }
 			pending.emplace(out);
 		}
 		seen.emplace(id);
 	}

 	return false;
 }

 void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
 {
 	addActiveLaneMaskEdge(block, to, mask & activeLaneMask());
 }

 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
 {
 	auto edge = Block::Edge{from, to};
 	auto it = edgeActiveLaneMasks.find(edge);
 	if (it == edgeActiveLaneMasks.end())
 	{
 		edgeActiveLaneMasks.emplace(edge, mask);
 	}
 	else
 	{
 		auto combined = it->second | mask;
 		edgeActiveLaneMasks.erase(edge);
 		edgeActiveLaneMasks.emplace(edge, combined);
 	}
 }

 RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
 {
 	auto edge = Block::Edge{from, to};
 	auto it = state->edgeActiveLaneMasks.find(edge);
 	ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
 	return it->second;
 }

 void SpirvShader::EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore /* = 0 */) const
 {
 	auto oldPending = state->pending;
 	auto &function = getFunction(state->function);

 	std::deque<Block::ID> pending;
 	state->pending = &pending;
 	pending.push_front(id);
 	while (pending.size() > 0)
 	{
 		auto id = pending.front();

 		auto const &block = function.getBlock(id);
 		if (id == ignore)
 		{
 			pending.pop_front();
 			continue;
 		}

 		// Ensure all dependency blocks have been generated.
 		auto depsDone = true;
 		function.ForeachBlockDependency(id, [&](Block::ID dep)
 		{
 			if (state->visited.count(dep) == 0)
 			{
 				state->pending->push_front(dep);
 				depsDone = false;
 			}
 		});

 		if (!depsDone)
 		{
 			continue;
 		}

 		pending.pop_front();

 		state->block = id;

 		switch (block.kind)
 		{
 			case Block::Simple:
 			case Block::StructuredBranchConditional:
 			case Block::UnstructuredBranchConditional:
 			case Block::StructuredSwitch:
 			case Block::UnstructuredSwitch:
 				EmitNonLoop(state);
 				break;

 			case Block::Loop:
 				EmitLoop(state);
 				break;

 			default:
 				UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
 		}
 	}

 	state->pending = oldPending;
 }

 void SpirvShader::EmitNonLoop(EmitState *state) const
 {
 	auto &function = getFunction(state->function);
 	auto blockId = state->block;
 	auto block = function.getBlock(blockId);

 	if (!state->visited.emplace(blockId).second)
 	{
 		return; // Already generated this block.
 	}

 	if (blockId != function.entry)
 	{
 		// Set the activeLaneMask.
 		SIMD::Int activeLaneMask(0);
 		for (auto in : block.ins)
 		{
 			auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
 			activeLaneMask |= inMask;
 		}
 		state->setActiveLaneMask(activeLaneMask);
 	}

 	EmitInstructions(block.begin(), block.end(), state);

 	for (auto out : block.outs)
 	{
 		if (state->visited.count(out) == 0)
 		{
 			state->pending->push_back(out);
 		}
 	}
 }

 void SpirvShader::EmitLoop(EmitState *state) const
 {
 	auto &function = getFunction(state->function);
 	auto blockId = state->block;
 	auto &block = function.getBlock(blockId);
 	auto mergeBlockId = block.mergeBlock;
 	auto &mergeBlock = function.getBlock(mergeBlockId);

 	if (!state->visited.emplace(blockId).second)
 	{
 		return; // Already emitted this loop.
 	}

 	// Gather all the blocks that make up the loop.
 	std::unordered_set<Block::ID> loopBlocks;
 	loopBlocks.emplace(block.mergeBlock);
 	function.TraverseReachableBlocks(blockId, loopBlocks);

 	// incomingBlocks are block ins that are not back-edges.
 	std::unordered_set<Block::ID> incomingBlocks;
 	for (auto in : block.ins)
 	{
 		if (loopBlocks.count(in) == 0)
 		{
 			incomingBlocks.emplace(in);
 		}
 	}

 	// Emit the loop phi instructions, and initialize them with a value from
 	// the incoming blocks.
 	for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
 	{
 		if (insn.opcode() == spv::OpPhi)
 		{
 			StorePhi(blockId, insn, state, incomingBlocks);
 		}
 	}

 	// loopActiveLaneMask is the mask of lanes that are continuing to loop.
 	// This is initialized with the incoming active lane masks.
 	SIMD::Int loopActiveLaneMask = SIMD::Int(0);
 	for (auto in : incomingBlocks)
 	{
 		loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
 	}

 	// mergeActiveLaneMasks contains edge lane masks for the merge block.
 	// This is the union of all edge masks across all iterations of the loop.
 	std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
 	for (auto in : function.getBlock(mergeBlockId).ins)
 	{
 		mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
 	}

 	// Create the loop basic blocks
 	auto headerBasicBlock = Nucleus::createBasicBlock();
 	auto mergeBasicBlock = Nucleus::createBasicBlock();

 	// Start emitting code inside the loop.
 	Nucleus::createBr(headerBasicBlock);
 	Nucleus::setInsertBlock(headerBasicBlock);

 	// Load the active lane mask.
 	state->setActiveLaneMask(loopActiveLaneMask);

 	// Emit the non-phi loop header block's instructions.
 	for (auto insn = block.begin(); insn != block.end(); insn++)
 	{
 		if (insn.opcode() == spv::OpPhi)
 		{
 			LoadPhi(insn, state);
 		}
 		else
 		{
 			EmitInstruction(insn, state);
 		}
 	}

 	// Emit all blocks between the loop header and the merge block, but
 	// don't emit the merge block yet.
 	for (auto out : block.outs)
 	{
 		EmitBlocks(out, state, mergeBlockId);
 	}

 	// Restore current block id after emitting loop blocks.
 	state->block = blockId;

 	// Rebuild the loopActiveLaneMask from the loop back edges.
 	loopActiveLaneMask = SIMD::Int(0);
 	for (auto in : block.ins)
 	{
 		if (function.ExistsPath(blockId, in, mergeBlockId))
 		{
 			loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
 		}
 	}

 	// Add active lanes to the merge lane mask.
 	for (auto in : function.getBlock(mergeBlockId).ins)
 	{
 		auto edge = Block::Edge{in, mergeBlockId};
 		auto it = state->edgeActiveLaneMasks.find(edge);
 		if (it != state->edgeActiveLaneMasks.end())
 		{
 			mergeActiveLaneMasks[in] |= it->second;
 		}
 	}

 	// Update loop phi values.
 	for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
 	{
 		if (insn.opcode() == spv::OpPhi)
 		{
 			StorePhi(blockId, insn, state, loopBlocks);
 		}
 	}

 	// Use the [loop -> merge] active lane masks to update the phi values in
 	// the merge block. We need to do this to handle divergent control flow
 	// in the loop.
 	//
 	// Consider the following:
 	//
 	//     int phi_source = 0;
 	//     for (uint i = 0; i < 4; i++)
 	//     {
 	//         phi_source = 0;
 	//         if (gl_GlobalInvocationID.x % 4 == i) // divergent control flow
 	//         {
 	//             phi_source = 42; // single lane assignment.
 	//             break; // activeLaneMask for [loop->merge] is active for a single lane.
 	//         }
 	//         // -- we are here --
 	//     }
 	//     // merge block
 	//     int phi = phi_source; // OpPhi
 	//
 	// In this example, with each iteration of the loop, phi_source will
 	// only have a single lane assigned. However by 'phi' value in the merge
 	// block needs to be assigned the union of all the per-lane assignments
 	// of phi_source when that lane exited the loop.
 	for (auto insn = mergeBlock.begin(); insn != mergeBlock.end(); insn++)
 	{
 		if (insn.opcode() == spv::OpPhi)
 		{
 			StorePhi(mergeBlockId, insn, state, loopBlocks);
 		}
 	}

 	// Loop body now done.
 	// If any lanes are still active, jump back to the loop header,
 	// otherwise jump to the merge block.
 	Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);

 	// Continue emitting from the merge block.
 	Nucleus::setInsertBlock(mergeBasicBlock);
 	state->pending->push_back(mergeBlockId);
 	for (auto it : mergeActiveLaneMasks)
 	{
 		state->addActiveLaneMaskEdge(it.first, mergeBlockId, it.second);
 	}
 }

 SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
 {
 	auto target = Block::ID(insn.word(1));
 	state->addActiveLaneMaskEdge(state->block, target, state->activeLaneMask());
 	return EmitResult::Terminator;
 }

 SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
 {
 	auto &function = getFunction(state->function);
 	auto block = function.getBlock(state->block);
 	ASSERT(block.branchInstruction == insn);

 	auto condId = Object::ID(block.branchInstruction.word(1));
 	auto trueBlockId = Block::ID(block.branchInstruction.word(2));
 	auto falseBlockId = Block::ID(block.branchInstruction.word(3));

 	auto cond = GenericValue(this, state, condId);
 	ASSERT_MSG(getType(cond.type).sizeInComponents == 1, "Condition must be a Boolean type scalar");

 	// TODO: Optimize for case where all lanes take same path.

 	state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
 	state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));

 	return EmitResult::Terminator;
 }

 SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
 {
 	auto &function = getFunction(state->function);
 	auto block = function.getBlock(state->block);
 	ASSERT(block.branchInstruction == insn);

 	auto selId = Object::ID(block.branchInstruction.word(1));

 	auto sel = GenericValue(this, state, selId);
 	ASSERT_MSG(getType(sel.type).sizeInComponents == 1, "Selector must be a scalar");

 	auto numCases = (block.branchInstruction.wordCount() - 3) / 2;

 	// TODO: Optimize for case where all lanes take same path.

 	SIMD::Int defaultLaneMask = state->activeLaneMask();

 	// Gather up the case label matches and calculate defaultLaneMask.
 	std::vector<RValue<SIMD::Int>> caseLabelMatches;
 	caseLabelMatches.reserve(numCases);
 	for (uint32_t i = 0; i < numCases; i++)
 	{
 		auto label = block.branchInstruction.word(i * 2 + 3);
 		auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
 		auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
 		state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
 		defaultLaneMask &= ~caseLabelMatch;
 	}

 	auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
 	state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);

 	return EmitResult::Terminator;
 }

 SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
 {
 	// TODO: Log something in this case?
 	state->setActiveLaneMask(SIMD::Int(0));
 	return EmitResult::Terminator;
 }

 SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
 {
 	state->setActiveLaneMask(SIMD::Int(0));
 	return EmitResult::Terminator;
 }

 SpirvShader::EmitResult SpirvShader::EmitKill(InsnIterator insn, EmitState *state) const
 {
 	state->routine->killMask |= SignMask(state->activeLaneMask());
 	state->setActiveLaneMask(SIMD::Int(0));
 	return EmitResult::Terminator;
 }

 SpirvShader::EmitResult SpirvShader::EmitFunctionCall(InsnIterator insn, EmitState *state) const
 {
 	auto functionId = Function::ID(insn.word(3));
 	const auto& functionIt = functions.find(functionId);
 	ASSERT(functionIt != functions.end());
 	auto& function = functionIt->second;

 	// TODO(b/141246700): Add full support for spv::OpFunctionCall
 	// The only supported function is a single OpKill wrapped in a
 	// function, as a result of the "wrap OpKill" SPIRV-Tools pass
 	ASSERT(function.blocks.size() == 1);
 	spv::Op wrapOpKill[] = { spv::OpLabel, spv::OpKill };

 	for (auto block : function.blocks)
 	{
 		int insnNumber = 0;
 		for (auto blockInsn : block.second)
 		{
 			if (insnNumber > 1)
 			{
 				UNIMPLEMENTED("Function block number of instructions: %d", insnNumber);
 				return EmitResult::Continue;
 			}
 			if (blockInsn.opcode() != wrapOpKill[insnNumber++])
 			{
 				UNIMPLEMENTED("Function block instruction %d : %s", insnNumber - 1, OpcodeName(blockInsn.opcode()).c_str());
 				return EmitResult::Continue;
 			}
 			if (blockInsn.opcode() == spv::OpKill)
 			{
 				EmitInstruction(blockInsn, state);
 			}
 		}
 	}

 	return EmitResult::Continue;
 }

 SpirvShader::EmitResult SpirvShader::EmitControlBarrier(InsnIterator insn, EmitState *state) const
 {
 	auto executionScope = spv::Scope(GetConstScalarInt(insn.word(1)));
 	auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(3)));
 	// TODO: We probably want to consider the memory scope here. For now,
 	// just always emit the full fence.
 	Fence(semantics);

 	switch (executionScope)
 	{
 	case spv::ScopeWorkgroup:
 		Yield(YieldResult::ControlBarrier);
 		break;
 	case spv::ScopeSubgroup:
 		break;
 	default:
 		// See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
 		UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
 		break;
 	}

 	return EmitResult::Continue;
 }

 SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
 {
 	auto &function = getFunction(state->function);
 	auto currentBlock = function.getBlock(state->block);
 	if (!currentBlock.isLoopMerge)
 	{
 		// If this is a loop merge block, then don't attempt to update the
 		// phi values from the ins. EmitLoop() has had to take special care
 		// of this phi in order to correctly deal with divergent lanes.
 		StorePhi(state->block, insn, state, currentBlock.ins);
 	}
 	LoadPhi(insn, state);
 	return EmitResult::Continue;
 }

 void SpirvShader::LoadPhi(InsnIterator insn, EmitState *state) const
 {
 	auto typeId = Type::ID(insn.word(1));
 	auto type = getType(typeId);
 	auto objectId = Object::ID(insn.word(2));

 	auto storageIt = state->routine->phis.find(objectId);
 	ASSERT(storageIt != state->routine->phis.end());
 	auto &storage = storageIt->second;

 	auto &dst = state->createIntermediate(objectId, type.sizeInComponents);
 	for(uint32_t i = 0; i < type.sizeInComponents; i++)
 	{
 		dst.move(i, storage[i]);
 	}
 }

 void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
 {
 	auto typeId = Type::ID(insn.word(1));
 	auto type = getType(typeId);
 	auto objectId = Object::ID(insn.word(2));

 	auto storageIt = state->routine->phis.find(objectId);
 	ASSERT(storageIt != state->routine->phis.end());
 	auto &storage = storageIt->second;

 	for (uint32_t w = 3; w < insn.wordCount(); w += 2)
 	{
 		auto varId = Object::ID(insn.word(w + 0));
 		auto blockId = Block::ID(insn.word(w + 1));

 		if (filter.count(blockId) == 0)
 		{
 			continue;
 		}

 		auto mask = GetActiveLaneMaskEdge(state, blockId, currentBlock);
 		auto in = GenericValue(this, state, varId);

 		for (uint32_t i = 0; i < type.sizeInComponents; i++)
 		{
 			storage[i] = As<SIMD::Float>((As<SIMD::Int>(storage[i]) & ~mask) | (in.Int(i) & mask));
 		}
 	}
 }

 void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
 {
 	if (semantics == spv::MemorySemanticsMaskNone)
 	{
 		return; //no-op
 	}
 	rr::Fence(MemoryOrder(semantics));
 }

 void SpirvShader::Yield(YieldResult res) const
 {
 	rr::Yield(RValue<Int>(int(res)));
 }

 }  // namespace sw
	// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "SpirvShader.hpp"

	#include "Reactor/Coroutine.hpp" // rr::Yield

	#include "ShaderCore.hpp"

	#include <spirv/unified1/spirv.hpp>

	#include <queue>

	namespace sw {

	SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
	{
	// Default to a Simple, this may change later.
	kind = Block::Simple;

	// Walk the instructions to find the last two of the block.
	InsnIterator insns[2];
	for (auto insn : *this)
	{
	insns[0] = insns[1];
	insns[1] = insn;
	}

	switch (insns[1].opcode())
	{
	case spv::OpBranch:
	branchInstruction = insns[1];
	outs.emplace(Block::ID(branchInstruction.word(1)));

	switch (insns[0].opcode())
	{
	case spv::OpLoopMerge:
	kind = Loop;
	mergeInstruction = insns[0];
	mergeBlock = Block::ID(mergeInstruction.word(1));
	continueTarget = Block::ID(mergeInstruction.word(2));
	break;

	default:
	kind = Block::Simple;
	break;
	}
	break;

	case spv::OpBranchConditional:
	branchInstruction = insns[1];
	outs.emplace(Block::ID(branchInstruction.word(2)));
	outs.emplace(Block::ID(branchInstruction.word(3)));

	switch (insns[0].opcode())
	{
	case spv::OpSelectionMerge:
	kind = StructuredBranchConditional;
	mergeInstruction = insns[0];
	mergeBlock = Block::ID(mergeInstruction.word(1));
	break;

	case spv::OpLoopMerge:
	kind = Loop;
	mergeInstruction = insns[0];
	mergeBlock = Block::ID(mergeInstruction.word(1));
	continueTarget = Block::ID(mergeInstruction.word(2));
	break;

	default:
	kind = UnstructuredBranchConditional;
	break;
	}
	break;

	case spv::OpSwitch:
	branchInstruction = insns[1];
	outs.emplace(Block::ID(branchInstruction.word(2)));
	for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
	{
	outs.emplace(Block::ID(branchInstruction.word(w)));
	}

	switch (insns[0].opcode())
	{
	case spv::OpSelectionMerge:
	kind = StructuredSwitch;
	mergeInstruction = insns[0];
	mergeBlock = Block::ID(mergeInstruction.word(1));
	break;

	default:
	kind = UnstructuredSwitch;
	break;
	}
	break;

	default:
	break;
	}
	}

	void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable) const
	{
	if (reachable.count(id) == 0)
	{
	reachable.emplace(id);
	for (auto out : getBlock(id).outs)
	{
	TraverseReachableBlocks(out, reachable);
	}
	}
	}

	void SpirvShader::Function::AssignBlockFields()
	{
	Block::Set reachable;
	TraverseReachableBlocks(entry, reachable);

	for (auto &it : blocks)
	{
	auto &blockId = it.first;
	auto &block = it.second;
	if (reachable.count(blockId) > 0)
	{
	for (auto &outId : it.second.outs)
	{
	auto outIt = blocks.find(outId);
	ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
	auto &out = outIt->second;
	out.ins.emplace(blockId);
	}
	if (block.kind == Block::Loop)
	{
	auto mergeIt = blocks.find(block.mergeBlock);
	ASSERT_MSG(mergeIt != blocks.end(), "Loop block %d has a non-existent merge block %d", blockId.value(), block.mergeBlock.value());
	mergeIt->second.isLoopMerge = true;
	}
	}
	}
	}

	void SpirvShader::Function::ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const
	{
	auto block = getBlock(blockId);
	for (auto dep : block.ins)
	{
	if (block.kind != Block::Loop \|\| // if not a loop...
	!ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
	{
	f(dep);
	}
	}
	}

	bool SpirvShader::Function::ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
	{
	// TODO: Optimize: This can be cached on the block.
	Block::Set seen;
	seen.emplace(notPassingThrough);

	std::queue<Block::ID> pending;
	pending.emplace(from);

	while (pending.size() > 0)
	{
	auto id = pending.front();
	pending.pop();
	for (auto out : getBlock(id).outs)
	{
	if (seen.count(out) != 0) { continue; }
	if (out == to) { return true; }
	pending.emplace(out);
	}
	seen.emplace(id);
	}

	return false;
	}

	void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
	{
	addActiveLaneMaskEdge(block, to, mask & activeLaneMask());
	}

	void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
	{
	auto edge = Block::Edge{from, to};
	auto it = edgeActiveLaneMasks.find(edge);
	if (it == edgeActiveLaneMasks.end())
	{
	edgeActiveLaneMasks.emplace(edge, mask);
	}
	else
	{
	auto combined = it->second \| mask;
	edgeActiveLaneMasks.erase(edge);
	edgeActiveLaneMasks.emplace(edge, combined);
	}
	}

	RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
	{
	auto edge = Block::Edge{from, to};
	auto it = state->edgeActiveLaneMasks.find(edge);
	ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
	return it->second;
	}

	void SpirvShader::EmitBlocks(Block::ID id, EmitState state, Block::ID ignore / = 0 */) const
	{
	auto oldPending = state->pending;
	auto &function = getFunction(state->function);

	std::deque<Block::ID> pending;
	state->pending = &pending;
	pending.push_front(id);
	while (pending.size() > 0)
	{
	auto id = pending.front();

	auto const &block = function.getBlock(id);
	if (id == ignore)
	{
	pending.pop_front();
	continue;
	}

	// Ensure all dependency blocks have been generated.
	auto depsDone = true;
	function.ForeachBlockDependency(id, [&](Block::ID dep)
	{
	if (state->visited.count(dep) == 0)
	{
	state->pending->push_front(dep);
	depsDone = false;
	}
	});

	if (!depsDone)
	{
	continue;
	}

	pending.pop_front();

	state->block = id;

	switch (block.kind)
	{
	case Block::Simple:
	case Block::StructuredBranchConditional:
	case Block::UnstructuredBranchConditional:
	case Block::StructuredSwitch:
	case Block::UnstructuredSwitch:
	EmitNonLoop(state);
	break;

	case Block::Loop:
	EmitLoop(state);
	break;

	default:
	UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
	}
	}

	state->pending = oldPending;
	}

	void SpirvShader::EmitNonLoop(EmitState *state) const
	{
	auto &function = getFunction(state->function);
	auto blockId = state->block;
	auto block = function.getBlock(blockId);

	if (!state->visited.emplace(blockId).second)
	{
	return; // Already generated this block.
	}

	if (blockId != function.entry)
	{
	// Set the activeLaneMask.
	SIMD::Int activeLaneMask(0);
	for (auto in : block.ins)
	{
	auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
	activeLaneMask \|= inMask;
	}
	state->setActiveLaneMask(activeLaneMask);
	}

	EmitInstructions(block.begin(), block.end(), state);

	for (auto out : block.outs)
	{
	if (state->visited.count(out) == 0)
	{
	state->pending->push_back(out);
	}
	}
	}

	void SpirvShader::EmitLoop(EmitState *state) const
	{
	auto &function = getFunction(state->function);
	auto blockId = state->block;
	auto &block = function.getBlock(blockId);
	auto mergeBlockId = block.mergeBlock;
	auto &mergeBlock = function.getBlock(mergeBlockId);

	if (!state->visited.emplace(blockId).second)
	{
	return; // Already emitted this loop.
	}

	// Gather all the blocks that make up the loop.
	std::unordered_set<Block::ID> loopBlocks;
	loopBlocks.emplace(block.mergeBlock);
	function.TraverseReachableBlocks(blockId, loopBlocks);

	// incomingBlocks are block ins that are not back-edges.
	std::unordered_set<Block::ID> incomingBlocks;
	for (auto in : block.ins)
	{
	if (loopBlocks.count(in) == 0)
	{
	incomingBlocks.emplace(in);
	}
	}

	// Emit the loop phi instructions, and initialize them with a value from
	// the incoming blocks.
	for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
	{
	if (insn.opcode() == spv::OpPhi)
	{
	StorePhi(blockId, insn, state, incomingBlocks);
	}
	}

	// loopActiveLaneMask is the mask of lanes that are continuing to loop.
	// This is initialized with the incoming active lane masks.
	SIMD::Int loopActiveLaneMask = SIMD::Int(0);
	for (auto in : incomingBlocks)
	{
	loopActiveLaneMask \|= GetActiveLaneMaskEdge(state, in, blockId);
	}

	// mergeActiveLaneMasks contains edge lane masks for the merge block.
	// This is the union of all edge masks across all iterations of the loop.
	std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
	for (auto in : function.getBlock(mergeBlockId).ins)
	{
	mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
	}

	// Create the loop basic blocks
	auto headerBasicBlock = Nucleus::createBasicBlock();
	auto mergeBasicBlock = Nucleus::createBasicBlock();

	// Start emitting code inside the loop.
	Nucleus::createBr(headerBasicBlock);
	Nucleus::setInsertBlock(headerBasicBlock);

	// Load the active lane mask.
	state->setActiveLaneMask(loopActiveLaneMask);

	// Emit the non-phi loop header block's instructions.
	for (auto insn = block.begin(); insn != block.end(); insn++)
	{
	if (insn.opcode() == spv::OpPhi)
	{
	LoadPhi(insn, state);
	}
	else
	{
	EmitInstruction(insn, state);
	}
	}

	// Emit all blocks between the loop header and the merge block, but
	// don't emit the merge block yet.
	for (auto out : block.outs)
	{
	EmitBlocks(out, state, mergeBlockId);
	}

	// Restore current block id after emitting loop blocks.
	state->block = blockId;

	// Rebuild the loopActiveLaneMask from the loop back edges.
	loopActiveLaneMask = SIMD::Int(0);
	for (auto in : block.ins)
	{
	if (function.ExistsPath(blockId, in, mergeBlockId))
	{
	loopActiveLaneMask \|= GetActiveLaneMaskEdge(state, in, blockId);
	}
	}

	// Add active lanes to the merge lane mask.
	for (auto in : function.getBlock(mergeBlockId).ins)
	{
	auto edge = Block::Edge{in, mergeBlockId};
	auto it = state->edgeActiveLaneMasks.find(edge);
	if (it != state->edgeActiveLaneMasks.end())
	{
	mergeActiveLaneMasks[in] \|= it->second;
	}
	}

	// Update loop phi values.
	for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
	{
	if (insn.opcode() == spv::OpPhi)
	{
	StorePhi(blockId, insn, state, loopBlocks);
	}
	}

	// Use the [loop -> merge] active lane masks to update the phi values in
	// the merge block. We need to do this to handle divergent control flow
	// in the loop.
	//
	// Consider the following:
	//
	// int phi_source = 0;
	// for (uint i = 0; i < 4; i++)
	// {
	// phi_source = 0;
	// if (gl_GlobalInvocationID.x % 4 == i) // divergent control flow
	// {
	// phi_source = 42; // single lane assignment.
	// break; // activeLaneMask for [loop->merge] is active for a single lane.
	// }
	// // -- we are here --
	// }
	// // merge block
	// int phi = phi_source; // OpPhi
	//
	// In this example, with each iteration of the loop, phi_source will
	// only have a single lane assigned. However by 'phi' value in the merge
	// block needs to be assigned the union of all the per-lane assignments
	// of phi_source when that lane exited the loop.
	for (auto insn = mergeBlock.begin(); insn != mergeBlock.end(); insn++)
	{
	if (insn.opcode() == spv::OpPhi)
	{
	StorePhi(mergeBlockId, insn, state, loopBlocks);
	}
	}

	// Loop body now done.
	// If any lanes are still active, jump back to the loop header,
	// otherwise jump to the merge block.
	Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);

	// Continue emitting from the merge block.
	Nucleus::setInsertBlock(mergeBasicBlock);
	state->pending->push_back(mergeBlockId);
	for (auto it : mergeActiveLaneMasks)
	{
	state->addActiveLaneMaskEdge(it.first, mergeBlockId, it.second);
	}
	}

	SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
	{
	auto target = Block::ID(insn.word(1));
	state->addActiveLaneMaskEdge(state->block, target, state->activeLaneMask());
	return EmitResult::Terminator;
	}

	SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
	{
	auto &function = getFunction(state->function);
	auto block = function.getBlock(state->block);
	ASSERT(block.branchInstruction == insn);

	auto condId = Object::ID(block.branchInstruction.word(1));
	auto trueBlockId = Block::ID(block.branchInstruction.word(2));
	auto falseBlockId = Block::ID(block.branchInstruction.word(3));

	auto cond = GenericValue(this, state, condId);
	ASSERT_MSG(getType(cond.type).sizeInComponents == 1, "Condition must be a Boolean type scalar");

	// TODO: Optimize for case where all lanes take same path.

	state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
	state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));

	return EmitResult::Terminator;
	}

	SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
	{
	auto &function = getFunction(state->function);
	auto block = function.getBlock(state->block);
	ASSERT(block.branchInstruction == insn);

	auto selId = Object::ID(block.branchInstruction.word(1));

	auto sel = GenericValue(this, state, selId);
	ASSERT_MSG(getType(sel.type).sizeInComponents == 1, "Selector must be a scalar");

	auto numCases = (block.branchInstruction.wordCount() - 3) / 2;

	// TODO: Optimize for case where all lanes take same path.

	SIMD::Int defaultLaneMask = state->activeLaneMask();

	// Gather up the case label matches and calculate defaultLaneMask.
	std::vector<RValue<SIMD::Int>> caseLabelMatches;
	caseLabelMatches.reserve(numCases);
	for (uint32_t i = 0; i < numCases; i++)
	{
	auto label = block.branchInstruction.word(i * 2 + 3);
	auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
	auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
	state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
	defaultLaneMask &= ~caseLabelMatch;
	}

	auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
	state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);

	return EmitResult::Terminator;
	}

	SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
	{
	// TODO: Log something in this case?
	state->setActiveLaneMask(SIMD::Int(0));
	return EmitResult::Terminator;
	}

	SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
	{
	state->setActiveLaneMask(SIMD::Int(0));
	return EmitResult::Terminator;
	}

	SpirvShader::EmitResult SpirvShader::EmitKill(InsnIterator insn, EmitState *state) const
	{
	state->routine->killMask \|= SignMask(state->activeLaneMask());
	state->setActiveLaneMask(SIMD::Int(0));
	return EmitResult::Terminator;
	}

	SpirvShader::EmitResult SpirvShader::EmitFunctionCall(InsnIterator insn, EmitState *state) const
	{
	auto functionId = Function::ID(insn.word(3));
	const auto& functionIt = functions.find(functionId);
	ASSERT(functionIt != functions.end());
	auto& function = functionIt->second;

	// TODO(b/141246700): Add full support for spv::OpFunctionCall
	// The only supported function is a single OpKill wrapped in a
	// function, as a result of the "wrap OpKill" SPIRV-Tools pass
	ASSERT(function.blocks.size() == 1);
	spv::Op wrapOpKill[] = { spv::OpLabel, spv::OpKill };

	for (auto block : function.blocks)
	{
	int insnNumber = 0;
	for (auto blockInsn : block.second)
	{
	if (insnNumber > 1)
	{
	UNIMPLEMENTED("Function block number of instructions: %d", insnNumber);
	return EmitResult::Continue;
	}
	if (blockInsn.opcode() != wrapOpKill[insnNumber++])
	{
	UNIMPLEMENTED("Function block instruction %d : %s", insnNumber - 1, OpcodeName(blockInsn.opcode()).c_str());
	return EmitResult::Continue;
	}
	if (blockInsn.opcode() == spv::OpKill)
	{
	EmitInstruction(blockInsn, state);
	}
	}
	}

	return EmitResult::Continue;
	}

	SpirvShader::EmitResult SpirvShader::EmitControlBarrier(InsnIterator insn, EmitState *state) const
	{
	auto executionScope = spv::Scope(GetConstScalarInt(insn.word(1)));
	auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(3)));
	// TODO: We probably want to consider the memory scope here. For now,
	// just always emit the full fence.
	Fence(semantics);

	switch (executionScope)
	{
	case spv::ScopeWorkgroup:
	Yield(YieldResult::ControlBarrier);
	break;
	case spv::ScopeSubgroup:
	break;
	default:
	// See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
	UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
	break;
	}

	return EmitResult::Continue;
	}

	SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
	{
	auto &function = getFunction(state->function);
	auto currentBlock = function.getBlock(state->block);
	if (!currentBlock.isLoopMerge)
	{
	// If this is a loop merge block, then don't attempt to update the
	// phi values from the ins. EmitLoop() has had to take special care
	// of this phi in order to correctly deal with divergent lanes.
	StorePhi(state->block, insn, state, currentBlock.ins);
	}
	LoadPhi(insn, state);
	return EmitResult::Continue;
	}

	void SpirvShader::LoadPhi(InsnIterator insn, EmitState *state) const
	{
	auto typeId = Type::ID(insn.word(1));
	auto type = getType(typeId);
	auto objectId = Object::ID(insn.word(2));

	auto storageIt = state->routine->phis.find(objectId);
	ASSERT(storageIt != state->routine->phis.end());
	auto &storage = storageIt->second;

	auto &dst = state->createIntermediate(objectId, type.sizeInComponents);
	for(uint32_t i = 0; i < type.sizeInComponents; i++)
	{
	dst.move(i, storage[i]);
	}
	}

	void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
	{
	auto typeId = Type::ID(insn.word(1));
	auto type = getType(typeId);
	auto objectId = Object::ID(insn.word(2));

	auto storageIt = state->routine->phis.find(objectId);
	ASSERT(storageIt != state->routine->phis.end());
	auto &storage = storageIt->second;

	for (uint32_t w = 3; w < insn.wordCount(); w += 2)
	{
	auto varId = Object::ID(insn.word(w + 0));
	auto blockId = Block::ID(insn.word(w + 1));

	if (filter.count(blockId) == 0)
	{
	continue;
	}

	auto mask = GetActiveLaneMaskEdge(state, blockId, currentBlock);
	auto in = GenericValue(this, state, varId);

	for (uint32_t i = 0; i < type.sizeInComponents; i++)
	{
	storage[i] = As<SIMD::Float>((As<SIMD::Int>(storage[i]) & ~mask) \| (in.Int(i) & mask));
	}
	}
	}

	void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
	{
	if (semantics == spv::MemorySemanticsMaskNone)
	{
	return; //no-op
	}
	rr::Fence(MemoryOrder(semantics));
	}

	void SpirvShader::Yield(YieldResult res) const
	{
	rr::Yield(RValue<Int>(int(res)));
	}

	} // namespace sw