Refactor GroupNonUniformArithmetic handling The implementation of GroupNonUniformArithmetic instructions used to rely on a pimpl struct containing a static template function. It's not very elegant and causes code bloat. It's also too closely tied to the SpirvShader class, which we want to refactor into a parsing-only class. This change reduces the template function to performing the operation on scalarized components. It doesn't need access to SpirvShader nor EmitState and takes one fewer arguments. Bug: b/247020580 Change-Id: I24955b42d84a3a31f139bc0a3aacc09e82f58fee Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/68868 Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp index 2e57c5d..1e775f7 100644 --- a/src/Pipeline/SpirvShader.hpp +++ b/src/Pipeline/SpirvShader.hpp
@@ -1589,7 +1589,6 @@ struct Impl { struct Debugger; - struct Group; Debugger *debugger = nullptr; }; Impl impl;
diff --git a/src/Pipeline/SpirvShaderGroup.cpp b/src/Pipeline/SpirvShaderGroup.cpp index 19c2c05..a081437 100644 --- a/src/Pipeline/SpirvShaderGroup.cpp +++ b/src/Pipeline/SpirvShaderGroup.cpp
@@ -18,63 +18,51 @@ namespace sw { -struct SpirvShader::Impl::Group +// Template function to perform a binary group operation. +// |TYPE| should be the type of the binary operation (as a SIMD::<ScalarType>). +// |I| should be a type suitable to initialize the identity value. +// |APPLY| should be a callable object that takes two RValue<TYPE> parameters +// and returns a new RValue<TYPE> corresponding to the operation's result. +template<typename TYPE, typename I, typename APPLY> +static RValue<TYPE> BinaryOperation( + spv::GroupOperation operation, + RValue<SIMD::UInt> value, + RValue<SIMD::UInt> mask, + const I identityValue, + APPLY &&apply) { - // Template function to perform a binary operation. - // |TYPE| should be the type of the binary operation (as a SIMD::<ScalarType>). - // |I| should be a type suitable to initialize the identity value. - // |APPLY| should be a callable object that takes two RValue<TYPE> parameters - // and returns a new RValue<TYPE> corresponding to the operation's result. - template<typename TYPE, typename I, typename APPLY> - static void BinaryOperation( - const SpirvShader *shader, - const SpirvShader::InsnIterator &insn, - const SpirvShader::EmitState *state, - Intermediate &dst, - const I identityValue, - APPLY &&apply) + auto identity = TYPE(identityValue); + SIMD::UInt v_uint = (value & mask) | (As<SIMD::UInt>(identity) & ~mask); + TYPE v = As<TYPE>(v_uint); + + switch(operation) { - SpirvShader::Operand value(shader, state, insn.word(5)); - auto &type = shader->getType(SpirvShader::Type::ID(insn.word(1))); - for(auto i = 0u; i < type.componentCount; i++) + case spv::GroupOperationReduce: { - auto mask = As<SIMD::UInt>(state->activeLaneMask()); // Considers helper invocations active. See b/151137030 - auto identity = TYPE(identityValue); - SIMD::UInt v_uint = (value.UInt(i) & mask) | (As<SIMD::UInt>(identity) & ~mask); - TYPE v = As<TYPE>(v_uint); - switch(spv::GroupOperation(insn.word(4))) - { - case spv::GroupOperationReduce: - { - // NOTE: floating-point add and multiply are not really commutative so - // ensure that all values in the final lanes are identical - TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw] - TYPE v3 = apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw] - dst.move(i, v3); - } - break; - case spv::GroupOperationInclusiveScan: - { - TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] - TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] - dst.move(i, v3); - } - break; - case spv::GroupOperationExclusiveScan: - { - TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] - TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] - auto v4 = Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz] - dst.move(i, v4); - } - break; - default: - UNSUPPORTED("EmitGroupNonUniform op: %s Group operation: %d", - SpirvShader::OpcodeName(type.opcode()), insn.word(4)); - } + // NOTE: floating-point add and multiply are not really commutative so + // ensure that all values in the final lanes are identical + TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw] + return apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw] } + break; + case spv::GroupOperationInclusiveScan: + { + TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] + return apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] + } + break; + case spv::GroupOperationExclusiveScan: + { + TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] + TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] + return Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz] + } + break; + default: + UNSUPPORTED("Group operation: %d", operation); + return identity; } -}; +} SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const { @@ -396,113 +384,126 @@ } break; - case spv::OpGroupNonUniformIAdd: - Impl::Group::BinaryOperation<SIMD::Int>( - this, insn, state, dst, 0, - [](auto a, auto b) { return a + b; }); - break; - - case spv::OpGroupNonUniformFAdd: - Impl::Group::BinaryOperation<SIMD::Float>( - this, insn, state, dst, 0.0f, - [](auto a, auto b) { return a + b; }); - break; - - case spv::OpGroupNonUniformIMul: - Impl::Group::BinaryOperation<SIMD::Int>( - this, insn, state, dst, 1, - [](auto a, auto b) { return a * b; }); - break; - - case spv::OpGroupNonUniformFMul: - Impl::Group::BinaryOperation<SIMD::Float>( - this, insn, state, dst, 1.0f, - [](auto a, auto b) { return a * b; }); - break; - - case spv::OpGroupNonUniformBitwiseAnd: - Impl::Group::BinaryOperation<SIMD::UInt>( - this, insn, state, dst, ~0u, - [](auto a, auto b) { return a & b; }); - break; - - case spv::OpGroupNonUniformBitwiseOr: - Impl::Group::BinaryOperation<SIMD::UInt>( - this, insn, state, dst, 0, - [](auto a, auto b) { return a | b; }); - break; - - case spv::OpGroupNonUniformBitwiseXor: - Impl::Group::BinaryOperation<SIMD::UInt>( - this, insn, state, dst, 0, - [](auto a, auto b) { return a ^ b; }); - break; - - case spv::OpGroupNonUniformSMin: - Impl::Group::BinaryOperation<SIMD::Int>( - this, insn, state, dst, INT32_MAX, - [](auto a, auto b) { return Min(a, b); }); - break; - - case spv::OpGroupNonUniformUMin: - Impl::Group::BinaryOperation<SIMD::UInt>( - this, insn, state, dst, ~0u, - [](auto a, auto b) { return Min(a, b); }); - break; - - case spv::OpGroupNonUniformFMin: - Impl::Group::BinaryOperation<SIMD::Float>( - this, insn, state, dst, SIMD::Float::infinity(), - [](auto a, auto b) { return NMin(a, b); }); - break; - - case spv::OpGroupNonUniformSMax: - Impl::Group::BinaryOperation<SIMD::Int>( - this, insn, state, dst, INT32_MIN, - [](auto a, auto b) { return Max(a, b); }); - break; - - case spv::OpGroupNonUniformUMax: - Impl::Group::BinaryOperation<SIMD::UInt>( - this, insn, state, dst, 0, - [](auto a, auto b) { return Max(a, b); }); - break; - - case spv::OpGroupNonUniformFMax: - Impl::Group::BinaryOperation<SIMD::Float>( - this, insn, state, dst, -SIMD::Float::infinity(), - [](auto a, auto b) { return NMax(a, b); }); - break; - - case spv::OpGroupNonUniformLogicalAnd: - Impl::Group::BinaryOperation<SIMD::UInt>( - this, insn, state, dst, ~0u, - [](auto a, auto b) { - SIMD::UInt zero = SIMD::UInt(0); - return CmpNEQ(a, zero) & CmpNEQ(b, zero); - }); - break; - - case spv::OpGroupNonUniformLogicalOr: - Impl::Group::BinaryOperation<SIMD::UInt>( - this, insn, state, dst, 0, - [](auto a, auto b) { - SIMD::UInt zero = SIMD::UInt(0); - return CmpNEQ(a, zero) | CmpNEQ(b, zero); - }); - break; - - case spv::OpGroupNonUniformLogicalXor: - Impl::Group::BinaryOperation<SIMD::UInt>( - this, insn, state, dst, 0, - [](auto a, auto b) { - SIMD::UInt zero = SIMD::UInt(0); - return CmpNEQ(a, zero) ^ CmpNEQ(b, zero); - }); - break; - + // The remaining instructions are GroupNonUniformArithmetic operations default: - UNSUPPORTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode())); + auto &type = getType(SpirvShader::Type::ID(insn.word(1))); + auto operation = static_cast<spv::GroupOperation>(insn.word(4)); + SpirvShader::Operand value(this, state, insn.word(5)); + auto mask = As<SIMD::UInt>(state->activeLaneMask()); // Considers helper invocations active. See b/151137030 + + for(uint32_t i = 0; i < type.componentCount; i++) + { + switch(insn.opcode()) + { + case spv::OpGroupNonUniformIAdd: + dst.move(i, BinaryOperation<SIMD::Int>( + operation, value.UInt(i), mask, 0, + [](auto a, auto b) { return a + b; })); + break; + case spv::OpGroupNonUniformFAdd: + dst.move(i, BinaryOperation<SIMD::Float>( + operation, value.UInt(i), mask, 0.0f, + [](auto a, auto b) { return a + b; })); + break; + + case spv::OpGroupNonUniformIMul: + dst.move(i, BinaryOperation<SIMD::Int>( + operation, value.UInt(i), mask, 1, + [](auto a, auto b) { return a * b; })); + break; + + case spv::OpGroupNonUniformFMul: + dst.move(i, BinaryOperation<SIMD::Float>( + operation, value.UInt(i), mask, 1.0f, + [](auto a, auto b) { return a * b; })); + break; + + case spv::OpGroupNonUniformBitwiseAnd: + dst.move(i, BinaryOperation<SIMD::UInt>( + operation, value.UInt(i), mask, ~0u, + [](auto a, auto b) { return a & b; })); + break; + + case spv::OpGroupNonUniformBitwiseOr: + dst.move(i, BinaryOperation<SIMD::UInt>( + operation, value.UInt(i), mask, 0, + [](auto a, auto b) { return a | b; })); + break; + + case spv::OpGroupNonUniformBitwiseXor: + dst.move(i, BinaryOperation<SIMD::UInt>( + operation, value.UInt(i), mask, 0, + [](auto a, auto b) { return a ^ b; })); + break; + + case spv::OpGroupNonUniformSMin: + dst.move(i, BinaryOperation<SIMD::Int>( + operation, value.UInt(i), mask, INT32_MAX, + [](auto a, auto b) { return Min(a, b); })); + break; + + case spv::OpGroupNonUniformUMin: + dst.move(i, BinaryOperation<SIMD::UInt>( + operation, value.UInt(i), mask, ~0u, + [](auto a, auto b) { return Min(a, b); })); + break; + + case spv::OpGroupNonUniformFMin: + dst.move(i, BinaryOperation<SIMD::Float>( + operation, value.UInt(i), mask, SIMD::Float::infinity(), + [](auto a, auto b) { return NMin(a, b); })); + break; + + case spv::OpGroupNonUniformSMax: + dst.move(i, BinaryOperation<SIMD::Int>( + operation, value.UInt(i), mask, INT32_MIN, + [](auto a, auto b) { return Max(a, b); })); + break; + + case spv::OpGroupNonUniformUMax: + dst.move(i, BinaryOperation<SIMD::UInt>( + operation, value.UInt(i), mask, 0, + [](auto a, auto b) { return Max(a, b); })); + break; + + case spv::OpGroupNonUniformFMax: + dst.move(i, BinaryOperation<SIMD::Float>( + operation, value.UInt(i), mask, -SIMD::Float::infinity(), + [](auto a, auto b) { return NMax(a, b); })); + break; + + case spv::OpGroupNonUniformLogicalAnd: + dst.move(i, BinaryOperation<SIMD::UInt>( + operation, value.UInt(i), mask, ~0u, + [](auto a, auto b) { + SIMD::UInt zero = SIMD::UInt(0); + return CmpNEQ(a, zero) & CmpNEQ(b, zero); + })); + break; + + case spv::OpGroupNonUniformLogicalOr: + dst.move(i, BinaryOperation<SIMD::UInt>( + operation, value.UInt(i), mask, 0, + [](auto a, auto b) { + SIMD::UInt zero = SIMD::UInt(0); + return CmpNEQ(a, zero) | CmpNEQ(b, zero); + })); + break; + + case spv::OpGroupNonUniformLogicalXor: + dst.move(i, BinaryOperation<SIMD::UInt>( + operation, value.UInt(i), mask, 0, + [](auto a, auto b) { + SIMD::UInt zero = SIMD::UInt(0); + return CmpNEQ(a, zero) ^ CmpNEQ(b, zero); + })); + break; + + default: + UNSUPPORTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode())); + } + } + break; } return EmitResult::Continue; }