SpirvShader: Implement GroupNonUniformBallot capability
Implements the following opcodes:
• OpGroupNonUniformBroadcast
• OpGroupNonUniformBroadcastFirst
• OpGroupNonUniformBallot
• OpGroupNonUniformInverseBallot
• OpGroupNonUniformBallotBitExtract
• OpGroupNonUniformBallotBitCount
• OpGroupNonUniformBallotFindLSB
• OpGroupNonUniformBallotFindMSB
Also don't yield for subgroup scoping - this is pointless, and does not currently work for graphics shaders.
Bug: b/133510501
Tests: dEQP-VK.subgroups.*
Change-Id: I39470bfa9f2184344d1c22e36975db0e23e48cc9
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35033
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index 3f4d442..01608fe 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -97,6 +97,51 @@
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
});
+ setInputBuiltin(routine, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
+ {
+ ASSERT(builtin.SizeInComponents == 4);
+ value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 2, 4, 8));
+ value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ });
+
+ setInputBuiltin(routine, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
+ {
+ ASSERT(builtin.SizeInComponents == 4);
+ value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(15, 14, 12, 8));
+ value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ });
+
+ setInputBuiltin(routine, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
+ {
+ ASSERT(builtin.SizeInComponents == 4);
+ value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(14, 12, 8, 0));
+ value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ });
+
+ setInputBuiltin(routine, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
+ {
+ ASSERT(builtin.SizeInComponents == 4);
+ value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 3, 7, 15));
+ value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ });
+
+ setInputBuiltin(routine, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
+ {
+ ASSERT(builtin.SizeInComponents == 4);
+ value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(0, 1, 3, 7));
+ value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ });
+
setInputBuiltin(routine, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
ASSERT(builtin.SizeInComponents == 1);
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index f245ea8..8780c39 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -59,6 +59,56 @@
routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
}
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupEqMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 2, 4, 8));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupGeMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(15, 14, 12, 8));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupGtMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(14, 12, 8, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupLeMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 3, 7, 15));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupLtMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(0, 1, 3, 7));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
it = spirvShader->inputBuiltins.find(spv::BuiltInDeviceIndex);
if (it != spirvShader->inputBuiltins.end())
{
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index adfd1c3..c15cd79 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -62,6 +62,21 @@
return std::make_pair(whole, frac);
}
+ // Returns the number of 1s in bits, per lane.
+ sw::SIMD::UInt CountBits(rr::RValue<sw::SIMD::UInt> const &bits)
+ {
+ // TODO: Add an intrinsic to reactor. Even if there isn't a
+ // single vector instruction, there may be target-dependent
+ // ways to make this faster.
+ // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ sw::SIMD::UInt c = bits - ((bits >> 1) & sw::SIMD::UInt(0x55555555));
+ c = ((c >> 2) & sw::SIMD::UInt(0x33333333)) + (c & sw::SIMD::UInt(0x33333333));
+ c = ((c >> 4) + c) & sw::SIMD::UInt(0x0F0F0F0F);
+ c = ((c >> 8) + c) & sw::SIMD::UInt(0x00FF00FF);
+ c = ((c >> 16) + c) & sw::SIMD::UInt(0x0000FFFF);
+ return c;
+ }
+
// Returns 1 << bits.
// If the resulting bit overflows a 32 bit integer, 0 is returned.
rr::RValue<sw::SIMD::UInt> NthBit32(rr::RValue<sw::SIMD::UInt> const &bits)
@@ -812,6 +827,7 @@
case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
case spv::CapabilityMultiView: capabilities.MultiView = true; break;
case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
+ case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
default:
UNSUPPORTED("Unsupported capability %u", insn.word(1));
}
@@ -1053,6 +1069,14 @@
case spv::OpImageRead:
case spv::OpImageTexelPointer:
case spv::OpGroupNonUniformElect:
+ case spv::OpGroupNonUniformBroadcast:
+ case spv::OpGroupNonUniformBroadcastFirst:
+ case spv::OpGroupNonUniformBallot:
+ case spv::OpGroupNonUniformInverseBallot:
+ case spv::OpGroupNonUniformBallotBitExtract:
+ case spv::OpGroupNonUniformBallotBitCount:
+ case spv::OpGroupNonUniformBallotFindLSB:
+ case spv::OpGroupNonUniformBallotFindMSB:
case spv::OpCopyObject:
case spv::OpArrayLength:
// Instructions that yield an intermediate value or divergent pointer
@@ -2699,6 +2723,14 @@
return EmitMemoryBarrier(insn, state);
case spv::OpGroupNonUniformElect:
+ case spv::OpGroupNonUniformBroadcast:
+ case spv::OpGroupNonUniformBroadcastFirst:
+ case spv::OpGroupNonUniformBallot:
+ case spv::OpGroupNonUniformInverseBallot:
+ case spv::OpGroupNonUniformBallotBitExtract:
+ case spv::OpGroupNonUniformBallotBitCount:
+ case spv::OpGroupNonUniformBallotFindLSB:
+ case spv::OpGroupNonUniformBallotFindMSB:
return EmitGroupNonUniform(insn, state);
case spv::OpArrayLength:
@@ -3289,20 +3321,8 @@
break;
}
case spv::OpBitCount:
- {
- // TODO: Add an intrinsic to reactor. Even if there isn't a
- // single vector instruction, there may be target-dependent
- // ways to make this faster.
- // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
- auto v = src.UInt(i);
- SIMD::UInt c = v - ((v >> 1) & SIMD::UInt(0x55555555));
- c = ((c >> 2) & SIMD::UInt(0x33333333)) + (c & SIMD::UInt(0x33333333));
- c = ((c >> 4) + c) & SIMD::UInt(0x0F0F0F0F);
- c = ((c >> 8) + c) & SIMD::UInt(0x00FF00FF);
- c = ((c >> 16) + c) & SIMD::UInt(0x0000FFFF);
- dst.move(i, c);
+ dst.move(i, CountBits(src.UInt(i)));
break;
- }
case spv::OpSNegate:
dst.move(i, -src.Int(i));
break;
@@ -5906,9 +5926,10 @@
switch (executionScope)
{
case spv::ScopeWorkgroup:
- case spv::ScopeSubgroup:
Yield(YieldResult::ControlBarrier);
break;
+ case spv::ScopeSubgroup:
+ break;
default:
// See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
@@ -5938,6 +5959,8 @@
SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
{
+ static_assert(SIMD::Width == 4, "EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4");
+
auto &type = getType(Type::ID(insn.word(1)));
Object::ID resultId = insn.word(2);
auto scope = spv::Scope(GetConstScalarInt(insn.word(3)));
@@ -5959,6 +5982,127 @@
dst.move(0, elect);
break;
}
+
+ case spv::OpGroupNonUniformBroadcast:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
+ GenericValue value(this, state, valueId);
+ auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
+ for (auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto oneVal = SIMD::Int(value.Int(i) & mask);
+ auto replVal = SIMD::Int(oneVal.xxzz | oneVal.yyww);
+ dst.move(i, replVal.xxyy | replVal.zzww);
+ }
+ break;
+ }
+
+ case spv::OpGroupNonUniformBroadcastFirst:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ GenericValue value(this, state, valueId);
+ // Result is true only in the active invocation with the lowest id
+ // in the group, otherwise result is false.
+ SIMD::Int active = state->activeLaneMask();
+ // TODO: Would be nice if we could write this as:
+ // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
+ auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
+ for (auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto oneVal = SIMD::Int(value.Int(i) & elect);
+ auto replVal = SIMD::Int(oneVal.xxzz | oneVal.yyww);
+ dst.move(i, replVal.xxyy | replVal.zzww);
+ }
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallot:
+ {
+ ASSERT(type.sizeInComponents == 4);
+ GenericValue predicate(this, state, insn.word(4));
+ dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0))));
+ dst.move(1, SIMD::Int(0));
+ dst.move(2, SIMD::Int(0));
+ dst.move(3, SIMD::Int(0));
+ break;
+ }
+
+ case spv::OpGroupNonUniformInverseBallot:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ GenericValue value(this, state, valueId);
+ auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1);
+ dst.move(0, -bit);
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallotBitExtract:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ auto indexId = Object::ID(insn.word(5));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ ASSERT(getType(getObject(indexId).type).sizeInComponents == 1);
+ GenericValue value(this, state, valueId);
+ GenericValue index(this, state, indexId);
+ auto vecIdx = index.Int(0) / SIMD::Int(32);
+ auto bitIdx = index.Int(0) & SIMD::Int(31);
+ auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) |
+ (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) |
+ (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) |
+ (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3)));
+ dst.move(0, -((bits >> bitIdx) & SIMD::Int(1)));
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallotBitCount:
+ {
+ auto operation = spv::GroupOperation(insn.word(4));
+ auto valueId = Object::ID(insn.word(5));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ GenericValue value(this, state, valueId);
+ switch (operation)
+ {
+ case spv::GroupOperationReduce:
+ dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15)));
+ break;
+ case spv::GroupOperationInclusiveScan:
+ dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15)));
+ break;
+ case spv::GroupOperationExclusiveScan:
+ dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7)));
+ break;
+ default:
+ UNSUPPORTED("GroupOperation %d", int(operation));
+ }
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallotFindLSB:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ GenericValue value(this, state, valueId);
+ dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true));
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallotFindMSB:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ GenericValue value(this, state, valueId);
+ dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false));
+ break;
+ }
+
default:
UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 44e252e..c3071bc 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -737,6 +737,7 @@
bool GroupNonUniform : 1;
bool MultiView : 1;
bool DeviceGroup : 1;
+ bool GroupNonUniformBallot : 1;
};
Capabilities const &getUsedCapabilities() const
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp
index e240e7f..cef5e8b 100644
--- a/src/Pipeline/VertexProgram.cpp
+++ b/src/Pipeline/VertexProgram.cpp
@@ -60,6 +60,56 @@
routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
}
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupEqMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 2, 4, 8));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupGeMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(15, 14, 12, 8));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupGtMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(14, 12, 8, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupLeMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 3, 7, 15));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
+ it = spirvShader->inputBuiltins.find(spv::BuiltInSubgroupLtMask);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ ASSERT(it->second.SizeInComponents == 4);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(0, 1, 3, 7));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
+ }
+
it = spirvShader->inputBuiltins.find(spv::BuiltInDeviceIndex);
if (it != spirvShader->inputBuiltins.end())
{
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index d721c94..f38ff36 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -313,7 +313,7 @@
{
properties->subgroupSize = sw::SIMD::Width;
properties->supportedStages = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT;
- properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
+ properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
properties->quadOperationsInAllStages = VK_FALSE;
}