blob: f0c7d610e58ff58117d63ba27c1fa4db9007c091 [file] [log] [blame]
// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ComputeProgram.hpp"
#include "Constants.hpp"
#include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkPipelineLayout.hpp"
namespace
{
enum { X, Y, Z };
} // anonymous namespace
namespace sw
{
ComputeProgram::ComputeProgram(SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets)
: data(Arg<0>()),
routine(pipelineLayout),
shader(shader),
pipelineLayout(pipelineLayout),
descriptorSets(descriptorSets)
{
}
ComputeProgram::~ComputeProgram()
{
}
void ComputeProgram::generate()
{
shader->emitProlog(&routine);
emit();
shader->emitEpilog(&routine);
}
void ComputeProgram::emit()
{
routine.descriptorSets = data + OFFSET(Data, descriptorSets);
routine.descriptorDynamicOffsets = data + OFFSET(Data, descriptorDynamicOffsets);
routine.pushConstants = data + OFFSET(Data, pushConstants);
routine.constants = *Pointer<Pointer<Byte>>(data + OFFSET(Data, constants));
auto &modes = shader->getModes();
int localSize[3] = {modes.WorkgroupSizeX, modes.WorkgroupSizeY, modes.WorkgroupSizeZ};
const int subgroupSize = SIMD::Width;
// Total number of invocations required to execute this workgroup.
int numInvocations = localSize[X] * localSize[Y] * localSize[Z];
Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
Int4 workgroupID = *Pointer<Int4>(data + OFFSET(Data, workgroupID));
Int4 workgroupSize = Int4(localSize[X], localSize[Y], localSize[Z], 0);
Int numSubgroups = (numInvocations + subgroupSize - 1) / subgroupSize;
setInputBuiltin(spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
As<SIMD::Float>(SIMD::Int(Extract(numWorkgroups, component)));
}
});
setInputBuiltin(spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
As<SIMD::Float>(SIMD::Int(Extract(workgroupID, component)));
}
});
setInputBuiltin(spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
As<SIMD::Float>(SIMD::Int(Extract(workgroupSize, component)));
}
});
setInputBuiltin(spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(numSubgroups));
});
setInputBuiltin(spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupSize));
});
setInputBuiltin(spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
});
For(Int subgroupIndex = 0, subgroupIndex < numSubgroups, subgroupIndex++)
{
// TODO: Replace SIMD::Int(0, 1, 2, 3) with SIMD-width equivalent
auto localInvocationIndex = SIMD::Int(subgroupIndex * SIMD::Width) + SIMD::Int(0, 1, 2, 3);
// Disable lanes where (invocationIDs >= numInvocations)
auto activeLaneMask = CmpLT(localInvocationIndex, SIMD::Int(numInvocations));
SIMD::Int localInvocationID[3];
{
SIMD::Int idx = localInvocationIndex;
localInvocationID[Z] = idx / SIMD::Int(localSize[X] * localSize[Y]);
idx -= localInvocationID[Z] * SIMD::Int(localSize[X] * localSize[Y]); // modulo
localInvocationID[Y] = idx / SIMD::Int(localSize[X]);
idx -= localInvocationID[Y] * SIMD::Int(localSize[X]); // modulo
localInvocationID[X] = idx;
}
setInputBuiltin(spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(localInvocationIndex);
});
setInputBuiltin(spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupIndex));
});
setInputBuiltin(spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] = As<SIMD::Float>(localInvocationID[component]);
}
});
setInputBuiltin(spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
auto localBase = workgroupID * workgroupSize;
for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
auto globalInvocationID = SIMD::Int(Extract(localBase, component)) + localInvocationID[component];
value[builtin.FirstComponent + component] = As<SIMD::Float>(globalInvocationID);
}
});
// Process numLanes of the workgroup.
shader->emit(&routine, activeLaneMask, descriptorSets);
}
}
void ComputeProgram::setInputBuiltin(spv::BuiltIn id, std::function<void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)> cb)
{
auto it = shader->inputBuiltins.find(id);
if (it != shader->inputBuiltins.end())
{
const auto& builtin = it->second;
cb(builtin, routine.getVariable(builtin.Id));
}
}
void ComputeProgram::run(
Routine *routine,
vk::DescriptorSet::Bindings const &descriptorSets,
vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
PushConstantStorage const &pushConstants,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
{
auto runWorkgroup = (void(*)(void*))(routine->getEntry());
Data data;
data.descriptorSets = descriptorSets;
data.descriptorDynamicOffsets = descriptorDynamicOffsets;
data.numWorkgroups[X] = groupCountX;
data.numWorkgroups[Y] = groupCountY;
data.numWorkgroups[Z] = groupCountZ;
data.numWorkgroups[3] = 0;
data.pushConstants = pushConstants;
data.constants = &sw::constants;
// TODO(bclayton): Split work across threads.
for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++)
{
data.workgroupID[Z] = groupZ;
for (uint32_t groupY = 0; groupY < groupCountY; groupY++)
{
data.workgroupID[Y] = groupY;
for (uint32_t groupX = 0; groupX < groupCountX; groupX++)
{
data.workgroupID[X] = groupX;
runWorkgroup(&data);
}
}
}
}
}