blob: 117e8f8f6d178513a263a3e23bff2526772c4ded [file] [log] [blame]
// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "SpirvShader.hpp"
#include "Reactor/Coroutine.hpp" // rr::Yield
#include "ShaderCore.hpp"
#include <spirv/unified1/spirv.hpp>
#include <queue>
namespace sw {
SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
{
// Default to a Simple, this may change later.
kind = Block::Simple;
// Walk the instructions to find the last two of the block.
InsnIterator insns[2];
for (auto insn : *this)
{
insns[0] = insns[1];
insns[1] = insn;
}
switch (insns[1].opcode())
{
case spv::OpBranch:
branchInstruction = insns[1];
outs.emplace(Block::ID(branchInstruction.word(1)));
switch (insns[0].opcode())
{
case spv::OpLoopMerge:
kind = Loop;
mergeInstruction = insns[0];
mergeBlock = Block::ID(mergeInstruction.word(1));
continueTarget = Block::ID(mergeInstruction.word(2));
break;
default:
kind = Block::Simple;
break;
}
break;
case spv::OpBranchConditional:
branchInstruction = insns[1];
outs.emplace(Block::ID(branchInstruction.word(2)));
outs.emplace(Block::ID(branchInstruction.word(3)));
switch (insns[0].opcode())
{
case spv::OpSelectionMerge:
kind = StructuredBranchConditional;
mergeInstruction = insns[0];
mergeBlock = Block::ID(mergeInstruction.word(1));
break;
case spv::OpLoopMerge:
kind = Loop;
mergeInstruction = insns[0];
mergeBlock = Block::ID(mergeInstruction.word(1));
continueTarget = Block::ID(mergeInstruction.word(2));
break;
default:
kind = UnstructuredBranchConditional;
break;
}
break;
case spv::OpSwitch:
branchInstruction = insns[1];
outs.emplace(Block::ID(branchInstruction.word(2)));
for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
{
outs.emplace(Block::ID(branchInstruction.word(w)));
}
switch (insns[0].opcode())
{
case spv::OpSelectionMerge:
kind = StructuredSwitch;
mergeInstruction = insns[0];
mergeBlock = Block::ID(mergeInstruction.word(1));
break;
default:
kind = UnstructuredSwitch;
break;
}
break;
default:
break;
}
}
void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable) const
{
if (reachable.count(id) == 0)
{
reachable.emplace(id);
for (auto out : getBlock(id).outs)
{
TraverseReachableBlocks(out, reachable);
}
}
}
void SpirvShader::Function::AssignBlockFields()
{
Block::Set reachable;
TraverseReachableBlocks(entry, reachable);
for (auto &it : blocks)
{
auto &blockId = it.first;
auto &block = it.second;
if (reachable.count(blockId) > 0)
{
for (auto &outId : it.second.outs)
{
auto outIt = blocks.find(outId);
ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
auto &out = outIt->second;
out.ins.emplace(blockId);
}
if (block.kind == Block::Loop)
{
auto mergeIt = blocks.find(block.mergeBlock);
ASSERT_MSG(mergeIt != blocks.end(), "Loop block %d has a non-existent merge block %d", blockId.value(), block.mergeBlock.value());
mergeIt->second.isLoopMerge = true;
}
}
}
}
void SpirvShader::Function::ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const
{
auto block = getBlock(blockId);
for (auto dep : block.ins)
{
if (block.kind != Block::Loop || // if not a loop...
!ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
{
f(dep);
}
}
}
bool SpirvShader::Function::ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
{
// TODO: Optimize: This can be cached on the block.
Block::Set seen;
seen.emplace(notPassingThrough);
std::queue<Block::ID> pending;
pending.emplace(from);
while (pending.size() > 0)
{
auto id = pending.front();
pending.pop();
for (auto out : getBlock(id).outs)
{
if (seen.count(out) != 0) { continue; }
if (out == to) { return true; }
pending.emplace(out);
}
seen.emplace(id);
}
return false;
}
void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
{
addActiveLaneMaskEdge(block, to, mask & activeLaneMask());
}
void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
{
auto edge = Block::Edge{from, to};
auto it = edgeActiveLaneMasks.find(edge);
if (it == edgeActiveLaneMasks.end())
{
edgeActiveLaneMasks.emplace(edge, mask);
}
else
{
auto combined = it->second | mask;
edgeActiveLaneMasks.erase(edge);
edgeActiveLaneMasks.emplace(edge, combined);
}
}
RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
{
auto edge = Block::Edge{from, to};
auto it = state->edgeActiveLaneMasks.find(edge);
ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
return it->second;
}
void SpirvShader::EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore /* = 0 */) const
{
auto oldPending = state->pending;
auto &function = getFunction(state->function);
std::deque<Block::ID> pending;
state->pending = &pending;
pending.push_front(id);
while (pending.size() > 0)
{
auto id = pending.front();
auto const &block = function.getBlock(id);
if (id == ignore)
{
pending.pop_front();
continue;
}
// Ensure all dependency blocks have been generated.
auto depsDone = true;
function.ForeachBlockDependency(id, [&](Block::ID dep)
{
if (state->visited.count(dep) == 0)
{
state->pending->push_front(dep);
depsDone = false;
}
});
if (!depsDone)
{
continue;
}
pending.pop_front();
state->block = id;
switch (block.kind)
{
case Block::Simple:
case Block::StructuredBranchConditional:
case Block::UnstructuredBranchConditional:
case Block::StructuredSwitch:
case Block::UnstructuredSwitch:
EmitNonLoop(state);
break;
case Block::Loop:
EmitLoop(state);
break;
default:
UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
}
}
state->pending = oldPending;
}
void SpirvShader::EmitNonLoop(EmitState *state) const
{
auto &function = getFunction(state->function);
auto blockId = state->block;
auto block = function.getBlock(blockId);
if (!state->visited.emplace(blockId).second)
{
return; // Already generated this block.
}
if (blockId != function.entry)
{
// Set the activeLaneMask.
SIMD::Int activeLaneMask(0);
for (auto in : block.ins)
{
auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
activeLaneMask |= inMask;
}
state->setActiveLaneMask(activeLaneMask);
}
EmitInstructions(block.begin(), block.end(), state);
for (auto out : block.outs)
{
if (state->visited.count(out) == 0)
{
state->pending->push_back(out);
}
}
}
void SpirvShader::EmitLoop(EmitState *state) const
{
auto &function = getFunction(state->function);
auto blockId = state->block;
auto &block = function.getBlock(blockId);
auto mergeBlockId = block.mergeBlock;
auto &mergeBlock = function.getBlock(mergeBlockId);
if (!state->visited.emplace(blockId).second)
{
return; // Already emitted this loop.
}
// Gather all the blocks that make up the loop.
std::unordered_set<Block::ID> loopBlocks;
loopBlocks.emplace(block.mergeBlock);
function.TraverseReachableBlocks(blockId, loopBlocks);
// incomingBlocks are block ins that are not back-edges.
std::unordered_set<Block::ID> incomingBlocks;
for (auto in : block.ins)
{
if (loopBlocks.count(in) == 0)
{
incomingBlocks.emplace(in);
}
}
// Emit the loop phi instructions, and initialize them with a value from
// the incoming blocks.
for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
{
if (insn.opcode() == spv::OpPhi)
{
StorePhi(blockId, insn, state, incomingBlocks);
}
}
// loopActiveLaneMask is the mask of lanes that are continuing to loop.
// This is initialized with the incoming active lane masks.
SIMD::Int loopActiveLaneMask = SIMD::Int(0);
for (auto in : incomingBlocks)
{
loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
}
// mergeActiveLaneMasks contains edge lane masks for the merge block.
// This is the union of all edge masks across all iterations of the loop.
std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
for (auto in : function.getBlock(mergeBlockId).ins)
{
mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
}
// Create the loop basic blocks
auto headerBasicBlock = Nucleus::createBasicBlock();
auto mergeBasicBlock = Nucleus::createBasicBlock();
// Start emitting code inside the loop.
Nucleus::createBr(headerBasicBlock);
Nucleus::setInsertBlock(headerBasicBlock);
// Load the active lane mask.
state->setActiveLaneMask(loopActiveLaneMask);
// Emit the non-phi loop header block's instructions.
for (auto insn = block.begin(); insn != block.end(); insn++)
{
if (insn.opcode() == spv::OpPhi)
{
LoadPhi(insn, state);
}
else
{
EmitInstruction(insn, state);
}
}
// Emit all blocks between the loop header and the merge block, but
// don't emit the merge block yet.
for (auto out : block.outs)
{
EmitBlocks(out, state, mergeBlockId);
}
// Restore current block id after emitting loop blocks.
state->block = blockId;
// Rebuild the loopActiveLaneMask from the loop back edges.
loopActiveLaneMask = SIMD::Int(0);
for (auto in : block.ins)
{
if (function.ExistsPath(blockId, in, mergeBlockId))
{
loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
}
}
// Add active lanes to the merge lane mask.
for (auto in : function.getBlock(mergeBlockId).ins)
{
auto edge = Block::Edge{in, mergeBlockId};
auto it = state->edgeActiveLaneMasks.find(edge);
if (it != state->edgeActiveLaneMasks.end())
{
mergeActiveLaneMasks[in] |= it->second;
}
}
// Update loop phi values.
for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
{
if (insn.opcode() == spv::OpPhi)
{
StorePhi(blockId, insn, state, loopBlocks);
}
}
// Use the [loop -> merge] active lane masks to update the phi values in
// the merge block. We need to do this to handle divergent control flow
// in the loop.
//
// Consider the following:
//
// int phi_source = 0;
// for (uint i = 0; i < 4; i++)
// {
// phi_source = 0;
// if (gl_GlobalInvocationID.x % 4 == i) // divergent control flow
// {
// phi_source = 42; // single lane assignment.
// break; // activeLaneMask for [loop->merge] is active for a single lane.
// }
// // -- we are here --
// }
// // merge block
// int phi = phi_source; // OpPhi
//
// In this example, with each iteration of the loop, phi_source will
// only have a single lane assigned. However by 'phi' value in the merge
// block needs to be assigned the union of all the per-lane assignments
// of phi_source when that lane exited the loop.
for (auto insn = mergeBlock.begin(); insn != mergeBlock.end(); insn++)
{
if (insn.opcode() == spv::OpPhi)
{
StorePhi(mergeBlockId, insn, state, loopBlocks);
}
}
// Loop body now done.
// If any lanes are still active, jump back to the loop header,
// otherwise jump to the merge block.
Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
// Continue emitting from the merge block.
Nucleus::setInsertBlock(mergeBasicBlock);
state->pending->push_back(mergeBlockId);
for (auto it : mergeActiveLaneMasks)
{
state->addActiveLaneMaskEdge(it.first, mergeBlockId, it.second);
}
}
SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
{
auto target = Block::ID(insn.word(1));
state->addActiveLaneMaskEdge(state->block, target, state->activeLaneMask());
return EmitResult::Terminator;
}
SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
{
auto &function = getFunction(state->function);
auto block = function.getBlock(state->block);
ASSERT(block.branchInstruction == insn);
auto condId = Object::ID(block.branchInstruction.word(1));
auto trueBlockId = Block::ID(block.branchInstruction.word(2));
auto falseBlockId = Block::ID(block.branchInstruction.word(3));
auto cond = GenericValue(this, state, condId);
ASSERT_MSG(getType(cond.type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
// TODO: Optimize for case where all lanes take same path.
state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
return EmitResult::Terminator;
}
SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
{
auto &function = getFunction(state->function);
auto block = function.getBlock(state->block);
ASSERT(block.branchInstruction == insn);
auto selId = Object::ID(block.branchInstruction.word(1));
auto sel = GenericValue(this, state, selId);
ASSERT_MSG(getType(sel.type).sizeInComponents == 1, "Selector must be a scalar");
auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
// TODO: Optimize for case where all lanes take same path.
SIMD::Int defaultLaneMask = state->activeLaneMask();
// Gather up the case label matches and calculate defaultLaneMask.
std::vector<RValue<SIMD::Int>> caseLabelMatches;
caseLabelMatches.reserve(numCases);
for (uint32_t i = 0; i < numCases; i++)
{
auto label = block.branchInstruction.word(i * 2 + 3);
auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
defaultLaneMask &= ~caseLabelMatch;
}
auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
return EmitResult::Terminator;
}
SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
{
// TODO: Log something in this case?
state->setActiveLaneMask(SIMD::Int(0));
return EmitResult::Terminator;
}
SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
{
state->setActiveLaneMask(SIMD::Int(0));
return EmitResult::Terminator;
}
SpirvShader::EmitResult SpirvShader::EmitKill(InsnIterator insn, EmitState *state) const
{
state->routine->killMask |= SignMask(state->activeLaneMask());
state->setActiveLaneMask(SIMD::Int(0));
return EmitResult::Terminator;
}
SpirvShader::EmitResult SpirvShader::EmitFunctionCall(InsnIterator insn, EmitState *state) const
{
auto functionId = Function::ID(insn.word(3));
const auto& functionIt = functions.find(functionId);
ASSERT(functionIt != functions.end());
auto& function = functionIt->second;
// TODO(b/141246700): Add full support for spv::OpFunctionCall
// The only supported function is a single OpKill wrapped in a
// function, as a result of the "wrap OpKill" SPIRV-Tools pass
ASSERT(function.blocks.size() == 1);
spv::Op wrapOpKill[] = { spv::OpLabel, spv::OpKill };
for (auto block : function.blocks)
{
int insnNumber = 0;
for (auto blockInsn : block.second)
{
if (insnNumber > 1)
{
UNIMPLEMENTED("Function block number of instructions: %d", insnNumber);
return EmitResult::Continue;
}
if (blockInsn.opcode() != wrapOpKill[insnNumber++])
{
UNIMPLEMENTED("Function block instruction %d : %s", insnNumber - 1, OpcodeName(blockInsn.opcode()).c_str());
return EmitResult::Continue;
}
if (blockInsn.opcode() == spv::OpKill)
{
EmitInstruction(blockInsn, state);
}
}
}
return EmitResult::Continue;
}
SpirvShader::EmitResult SpirvShader::EmitControlBarrier(InsnIterator insn, EmitState *state) const
{
auto executionScope = spv::Scope(GetConstScalarInt(insn.word(1)));
auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(3)));
// TODO: We probably want to consider the memory scope here. For now,
// just always emit the full fence.
Fence(semantics);
switch (executionScope)
{
case spv::ScopeWorkgroup:
Yield(YieldResult::ControlBarrier);
break;
case spv::ScopeSubgroup:
break;
default:
// See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
break;
}
return EmitResult::Continue;
}
SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
{
auto &function = getFunction(state->function);
auto currentBlock = function.getBlock(state->block);
if (!currentBlock.isLoopMerge)
{
// If this is a loop merge block, then don't attempt to update the
// phi values from the ins. EmitLoop() has had to take special care
// of this phi in order to correctly deal with divergent lanes.
StorePhi(state->block, insn, state, currentBlock.ins);
}
LoadPhi(insn, state);
return EmitResult::Continue;
}
void SpirvShader::LoadPhi(InsnIterator insn, EmitState *state) const
{
auto typeId = Type::ID(insn.word(1));
auto type = getType(typeId);
auto objectId = Object::ID(insn.word(2));
auto storageIt = state->routine->phis.find(objectId);
ASSERT(storageIt != state->routine->phis.end());
auto &storage = storageIt->second;
auto &dst = state->createIntermediate(objectId, type.sizeInComponents);
for(uint32_t i = 0; i < type.sizeInComponents; i++)
{
dst.move(i, storage[i]);
}
}
void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
{
auto typeId = Type::ID(insn.word(1));
auto type = getType(typeId);
auto objectId = Object::ID(insn.word(2));
auto storageIt = state->routine->phis.find(objectId);
ASSERT(storageIt != state->routine->phis.end());
auto &storage = storageIt->second;
for (uint32_t w = 3; w < insn.wordCount(); w += 2)
{
auto varId = Object::ID(insn.word(w + 0));
auto blockId = Block::ID(insn.word(w + 1));
if (filter.count(blockId) == 0)
{
continue;
}
auto mask = GetActiveLaneMaskEdge(state, blockId, currentBlock);
auto in = GenericValue(this, state, varId);
for (uint32_t i = 0; i < type.sizeInComponents; i++)
{
storage[i] = As<SIMD::Float>((As<SIMD::Int>(storage[i]) & ~mask) | (in.Int(i) & mask));
}
}
}
void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
{
if (semantics == spv::MemorySemanticsMaskNone)
{
return; //no-op
}
rr::Fence(MemoryOrder(semantics));
}
void SpirvShader::Yield(YieldResult res) const
{
rr::Yield(RValue<Int>(int(res)));
}
} // namespace sw