Enable fragmentStoresAndAtomics
A few changes were made to make this work:
- activeLaneMask now contains cMask. Since it's only used in
loads, stores and atomics, it should be fine to include it
- Added a storesAndAtomicsMask, which also contains sMask
(stencil) and zMask (depth) for early fragment tests.
The mask affects all atomic operations and store operations
into storage buffer and images.
- support for spv::BuiltInHelperInvocation was added
Bug b/140294254
Test: dEQP-VK.*
Change-Id: I42b97a766ddfe331bb2767d80d4360104a221482
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34114
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index a209339..2a86cc4 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -188,7 +188,7 @@
setSubgroupBuiltins(data, routine, workgroupID, localInvocationIndex, subgroupIndex);
- shader->emit(routine, activeLaneMask, descriptorSets);
+ shader->emit(routine, activeLaneMask, activeLaneMask, descriptorSets);
}
}
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index d9a7bb5..d38e245 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -20,7 +20,43 @@
namespace sw
{
- void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
+ // Union all cMask and return it as 4 booleans
+ Int4 PixelProgram::maskAny(Int cMask[4]) const
+ {
+ // See if at least 1 sample is used
+ Int maskUnion = cMask[0];
+ for(auto i = 1u; i < state.multiSample; i++)
+ {
+ maskUnion |= cMask[i];
+ }
+
+ // Convert to 4 booleans
+ Int4 laneBits = Int4(1, 2, 4, 8);
+ Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
+ Int4 mask(maskUnion);
+ mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
+ return mask;
+ }
+
+ // Union all cMask/sMask/zMask and return it as 4 booleans
+ Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const
+ {
+ // See if at least 1 sample is used
+ Int maskUnion = cMask[0] & sMask[0] & zMask[0];
+ for(auto i = 1u; i < state.multiSample; i++)
+ {
+ maskUnion |= (cMask[i] & sMask[i] & zMask[i]);
+ }
+
+ // Convert to 4 booleans
+ Int4 laneBits = Int4(1, 2, 4, 8);
+ Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
+ Int4 mask(maskUnion);
+ mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
+ return mask;
+ }
+
+ void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4])
{
routine.setImmutableInputBuiltins(spirvShader);
@@ -54,12 +90,18 @@
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
});
+ routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
+ {
+ assert(builtin.SizeInComponents == 1);
+ value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask));
+ });
+
routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1);
routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1);
routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID));
}
- void PixelProgram::applyShader(Int cMask[4])
+ void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
{
routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
@@ -96,9 +138,10 @@
// Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
// handled separately, through the cMask.
auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
+ auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask);
routine.killMask = 0;
- spirvShader->emit(&routine, activeLaneMask, descriptorSets);
+ spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
spirvShader->emitEpilog(&routine);
for(int i = 0; i < RENDERTARGETS; i++)
diff --git a/src/Pipeline/PixelProgram.hpp b/src/Pipeline/PixelProgram.hpp
index 9f6d14f..3555aee 100644
--- a/src/Pipeline/PixelProgram.hpp
+++ b/src/Pipeline/PixelProgram.hpp
@@ -34,8 +34,8 @@
virtual ~PixelProgram() {}
protected:
- virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w);
- virtual void applyShader(Int cMask[4]);
+ virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]);
+ virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]);
virtual Bool alphaTest(Int cMask[4]);
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);
@@ -46,6 +46,8 @@
// Raster operations
void clampColor(Vector4f oC[RENDERTARGETS]);
+ Int4 maskAny(Int cMask[4]) const;
+ Int4 maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const;
Float4 linearToSRGB(const Float4 &x);
};
}
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index a06b4d1..186a44a 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -53,7 +53,7 @@
void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
{
// TODO: consider shader which modifies sample mask in general
- const bool earlyDepthTest = !spirvShader || (!spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
+ const bool earlyDepthTest = !spirvShader || (spirvShader->getModes().EarlyFragmentTests && !spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
Int zMask[4]; // Depth mask
Int sMask[4]; // Stencil mask
@@ -161,14 +161,15 @@
}
}
- setBuiltins(x, y, z, w);
+ setBuiltins(x, y, z, w, cMask);
}
Bool alphaPass = true;
if (spirvShader)
{
- applyShader(cMask);
+ bool earlyFragTests = (spirvShader && spirvShader->getModes().EarlyFragmentTests);
+ applyShader(cMask, earlyFragTests ? sMask : cMask, earlyDepthTest ? zMask : cMask);
}
alphaPass = alphaTest(cMask);
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index c4d2423..b9486af 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -43,8 +43,8 @@
// Depth output
Float4 oDepth;
- virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) = 0;
- virtual void applyShader(Int cMask[4]) = 0;
+ virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]) = 0;
+ virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) = 0;
virtual Bool alphaTest(Int cMask[4]) = 0;
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0;
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index d47fd87..63d403c 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1367,6 +1367,19 @@
}
}
+ bool SpirvShader::StoresInHelperInvocation(spv::StorageClass storageClass)
+ {
+ switch (storageClass)
+ {
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
+ case spv::StorageClassImage:
+ return false;
+ default:
+ return true;
+ }
+ }
+
bool SpirvShader::IsExplicitLayout(spv::StorageClass storageClass)
{
switch (storageClass)
@@ -2136,9 +2149,9 @@
}
}
- void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const
+ void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const
{
- EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess, executionModel);
+ EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, robustBufferAccess, executionModel);
// Emit everything up to the first label
// TODO: Separate out dispatch of block from non-block instructions?
@@ -2975,6 +2988,12 @@
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
+ SIMD::Int mask = state->activeLaneMask();
+ if (!StoresInHelperInvocation(pointerTy.storageClass))
+ {
+ mask = mask & state->storesAndAtomicsMask();
+ }
+
if (object.kind == Object::Kind::Constant)
{
// Constant source data.
@@ -2983,7 +3002,7 @@
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, SIMD::Float(src[i]), robustness, state->activeLaneMask(), atomic, memoryOrder);
+ SIMD::Store(p, SIMD::Float(src[i]), robustness, mask, atomic, memoryOrder);
});
}
else
@@ -2994,7 +3013,7 @@
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, src.Float(i), robustness, state->activeLaneMask(), atomic, memoryOrder);
+ SIMD::Store(p, src.Float(i), robustness, mask, atomic, memoryOrder);
});
}
@@ -5843,10 +5862,11 @@
auto ptr = state->getPointer(insn.word(3));
auto ptrOffsets = ptr.offsets();
- SIMD::UInt x;
+ SIMD::UInt x(0);
+ auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
for (int j = 0; j < SIMD::Width; j++)
{
- If(Extract(state->activeLaneMask(), j) != 0)
+ If(Extract(mask, j) != 0)
{
auto offset = Extract(ptrOffsets, j);
auto laneValue = Extract(value, j);
@@ -5914,10 +5934,11 @@
auto ptr = state->getPointer(insn.word(3));
auto ptrOffsets = ptr.offsets();
- SIMD::UInt x;
+ SIMD::UInt x(0);
+ auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
for (int j = 0; j < SIMD::Width; j++)
{
- If(Extract(state->activeLaneMask(), j) != 0)
+ If(Extract(mask, j) != 0)
{
auto offset = Extract(ptrOffsets, j);
auto laneValue = Extract(value.UInt(0), j);
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 0cc4373..a48e3b3 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -889,7 +889,7 @@
std::vector<InterfaceComponent> outputs;
void emitProlog(SpirvRoutine *routine) const;
- void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
+ void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
void emitEpilog(SpirvRoutine *routine) const;
using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
@@ -967,6 +967,9 @@
//
static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
static bool IsExplicitLayout(spv::StorageClass storageClass);
+
+ // Output storage buffers and images should not be affected by helper invocations
+ static bool StoresInHelperInvocation(spv::StorageClass storageClass);
template<typename F>
int VisitInterfaceInner(Type::ID id, Decorations d, F f) const;
@@ -991,12 +994,14 @@
EmitState(SpirvRoutine *routine,
Function::ID function,
RValue<SIMD::Int> activeLaneMask,
+ RValue<SIMD::Int> storesAndAtomicsMask,
const vk::DescriptorSet::Bindings &descriptorSets,
bool robustBufferAccess,
spv::ExecutionModel executionModel)
: routine(routine),
function(function),
activeLaneMaskValue(activeLaneMask.value),
+ storesAndAtomicsMaskValue(storesAndAtomicsMask.value),
descriptorSets(descriptorSets),
robustBufferAccess(robustBufferAccess),
executionModel(executionModel)
@@ -1010,6 +1015,12 @@
return RValue<SIMD::Int>(activeLaneMaskValue);
}
+ RValue<SIMD::Int> storesAndAtomicsMask() const
+ {
+ ASSERT(storesAndAtomicsMaskValue != nullptr);
+ return RValue<SIMD::Int>(storesAndAtomicsMaskValue);
+ }
+
void setActiveLaneMask(RValue<SIMD::Int> mask)
{
activeLaneMaskValue = mask.value;
@@ -1030,6 +1041,7 @@
Function::ID function; // The current function being built.
Block::ID block; // The current block being built.
rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask.
+ rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask.
Block::Set visited; // Blocks already built.
std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
std::deque<Block::ID> *pending;
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp
index c7f020c..dedf800 100644
--- a/src/Pipeline/VertexProgram.cpp
+++ b/src/Pipeline/VertexProgram.cpp
@@ -80,7 +80,7 @@
}
auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
- spirvShader->emit(&routine, activeLaneMask, descriptorSets);
+ spirvShader->emit(&routine, activeLaneMask, activeLaneMask, descriptorSets);
spirvShader->emitEpilog(&routine);
}
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 4abb0f8..8e6b8e7 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -57,7 +57,7 @@
VK_FALSE, // occlusionQueryPrecise
VK_FALSE, // pipelineStatisticsQuery
VK_FALSE, // vertexPipelineStoresAndAtomics
- VK_FALSE, // fragmentStoresAndAtomics
+ VK_TRUE, // fragmentStoresAndAtomics
VK_FALSE, // shaderTessellationAndGeometryPointSize
VK_FALSE, // shaderImageGatherExtended
VK_FALSE, // shaderStorageImageExtendedFormats