|  | // Copyright 2019 The SwiftShader Authors. All Rights Reserved. | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | #include "SpirvShader.hpp" | 
|  |  | 
|  | #include <spirv/unified1/spirv.hpp> | 
|  |  | 
|  | namespace sw { | 
|  |  | 
|  | // Template function to perform a binary group operation. | 
|  | // |TYPE| should be the type of the binary operation (as a SIMD::<ScalarType>). | 
|  | // |I| should be a type suitable to initialize the identity value. | 
|  | // |APPLY| should be a callable object that takes two RValue<TYPE> parameters | 
|  | // and returns a new RValue<TYPE> corresponding to the operation's result. | 
|  | template<typename TYPE, typename I, typename APPLY> | 
|  | static RValue<TYPE> BinaryOperation( | 
|  | spv::GroupOperation operation, | 
|  | RValue<SIMD::UInt> value, | 
|  | RValue<SIMD::UInt> mask, | 
|  | const I identityValue, | 
|  | APPLY &&apply) | 
|  | { | 
|  | auto identity = TYPE(identityValue); | 
|  | SIMD::UInt v_uint = (value & mask) | (As<SIMD::UInt>(identity) & ~mask); | 
|  | TYPE v = As<TYPE>(v_uint); | 
|  |  | 
|  | switch(operation) | 
|  | { | 
|  | case spv::GroupOperationReduce: | 
|  | { | 
|  | // NOTE: floating-point add and multiply are not really commutative so | 
|  | //       ensure that all values in the final lanes are identical | 
|  | TYPE v2 = apply(v.xxzz, v.yyww);  // [xy]   [xy]   [zw]   [zw] | 
|  | return apply(v2.xxxx, v2.zzzz);   // [xyzw] [xyzw] [xyzw] [xyzw] | 
|  | } | 
|  | break; | 
|  | case spv::GroupOperationInclusiveScan: | 
|  | { | 
|  | TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */);   // [x] [xy] [yz]  [zw] | 
|  | return apply(v2, Shuffle(v2, identity, 0x4401) /* [id,  id, v2.x, v2.y] */);  // [x] [xy] [xyz] [xyzw] | 
|  | } | 
|  | break; | 
|  | case spv::GroupOperationExclusiveScan: | 
|  | { | 
|  | TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */);      // [x] [xy] [yz]  [zw] | 
|  | TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id,  id, v2.x, v2.y] */);  // [x] [xy] [xyz] [xyzw] | 
|  | return Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */);               // [i] [x]  [xy]  [xyz] | 
|  | } | 
|  | break; | 
|  | default: | 
|  | UNSUPPORTED("Group operation: %d", operation); | 
|  | return identity; | 
|  | } | 
|  | } | 
|  |  | 
|  | void SpirvEmitter::EmitGroupNonUniform(InsnIterator insn) | 
|  | { | 
|  | ASSERT(SIMD::Width == 4);  // EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4 | 
|  |  | 
|  | auto &type = shader.getType(Type::ID(insn.word(1))); | 
|  | Object::ID resultId = insn.word(2); | 
|  | auto scope = spv::Scope(shader.GetConstScalarInt(insn.word(3))); | 
|  | ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1"); | 
|  |  | 
|  | auto &dst = createIntermediate(resultId, type.componentCount); | 
|  |  | 
|  | switch(insn.opcode()) | 
|  | { | 
|  | case spv::OpGroupNonUniformElect: | 
|  | { | 
|  | // Result is true only in the active invocation with the lowest id | 
|  | // in the group, otherwise result is false. | 
|  | SIMD::Int active = activeLaneMask();  // Considers helper invocations active. See b/151137030 | 
|  | // TODO: Would be nice if we could write this as: | 
|  | //   elect = active & ~(active.Oxyz | active.OOxy | active.OOOx) | 
|  | auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); | 
|  | auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx)); | 
|  | dst.move(0, elect); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformAll: | 
|  | { | 
|  | Operand predicate(shader, *this, insn.word(4)); | 
|  | dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(activeLaneMask())));  // Considers helper invocations active. See b/151137030 | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformAny: | 
|  | { | 
|  | Operand predicate(shader, *this, insn.word(4)); | 
|  | dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(activeLaneMask())));  // Considers helper invocations active. See b/151137030 | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformAllEqual: | 
|  | { | 
|  | Operand value(shader, *this, insn.word(4)); | 
|  | auto res = SIMD::UInt(0xffffffff); | 
|  | SIMD::UInt active = As<SIMD::UInt>(activeLaneMask());  // Considers helper invocations active. See b/151137030 | 
|  | SIMD::UInt inactive = ~active; | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | SIMD::UInt v = value.UInt(i) & active; | 
|  | SIMD::UInt filled = v; | 
|  | for(int j = 0; j < SIMD::Width - 1; j++) | 
|  | { | 
|  | filled |= filled.yzwx & inactive;  // Populate inactive 'holes' with a live value | 
|  | } | 
|  | res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx)); | 
|  | } | 
|  | dst.move(0, res); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBroadcast: | 
|  | { | 
|  | auto valueId = Object::ID(insn.word(4)); | 
|  | auto idId = Object::ID(insn.word(5)); | 
|  | Operand value(shader, *this, valueId); | 
|  |  | 
|  | // Decide between the fast path for constants and the slow path for | 
|  | // intermediates. | 
|  | if(shader.getObject(idId).kind == Object::Kind::Constant) | 
|  | { | 
|  | auto id = SIMD::Int(shader.GetConstScalarInt(insn.word(5))); | 
|  | auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3)); | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | dst.move(i, OrAll(value.Int(i) & mask)); | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | Operand id(shader, *this, idId); | 
|  |  | 
|  | SIMD::UInt active = As<SIMD::UInt>(activeLaneMask());  // Considers helper invocations active. See b/151137030 | 
|  | SIMD::UInt inactive = ~active; | 
|  | SIMD::UInt filled = id.UInt(0) & active; | 
|  |  | 
|  | for(int j = 0; j < SIMD::Width - 1; j++) | 
|  | { | 
|  | filled |= filled.yzwx & inactive;  // Populate inactive 'holes' with a live value | 
|  | } | 
|  |  | 
|  | auto mask = CmpEQ(filled, SIMD::UInt(0, 1, 2, 3)); | 
|  |  | 
|  | for(uint32_t i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | dst.move(i, OrAll(value.UInt(i) & mask)); | 
|  | } | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBroadcastFirst: | 
|  | { | 
|  | auto valueId = Object::ID(insn.word(4)); | 
|  | Operand value(shader, *this, valueId); | 
|  | // Result is true only in the active invocation with the lowest id | 
|  | // in the group, otherwise result is false. | 
|  | SIMD::Int active = activeLaneMask();  // Considers helper invocations active. See b/151137030 | 
|  | // TODO: Would be nice if we could write this as: | 
|  | //   elect = active & ~(active.Oxyz | active.OOxy | active.OOOx) | 
|  | auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); | 
|  | auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx)); | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | dst.move(i, OrAll(value.Int(i) & elect)); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformQuadBroadcast: | 
|  | { | 
|  | auto valueId = Object::ID(insn.word(4)); | 
|  | Operand value(shader, *this, valueId); | 
|  |  | 
|  | ASSERT(shader.getType(shader.getObject(insn.word(5))).componentCount == 1); | 
|  | auto indexId = Object::ID(insn.word(5)); | 
|  | SIMD::Int index = Operand(shader, *this, indexId).Int(0); | 
|  |  | 
|  | SIMD::Int active = activeLaneMask(); | 
|  | // Populate all lanes in index with the same value. Index is required to be | 
|  | // uniform per the SPIR-V spec, so all active lanes should be identical. | 
|  | index = OrAll(active & index); | 
|  | SIMD::Int mask = CmpEQ(index, SIMD::Int(0, 1, 2, 3)); | 
|  |  | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | dst.move(i, OrAll(value.Int(i) & mask)); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformQuadSwap: | 
|  | { | 
|  | auto valueId = Object::ID(insn.word(4)); | 
|  | // SPIR-V spec: Drection must be a scalar of integer type and come from a constant instruction | 
|  | int direction = shader.GetConstScalarInt(insn.word(5)); | 
|  |  | 
|  | Operand value(shader, *this, valueId); | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | SIMD::Int v = value.Int(i); | 
|  | switch(direction) | 
|  | { | 
|  | case 0:  // Horizontal | 
|  | dst.move(i, v.yxwz); | 
|  | break; | 
|  | case 1:  // Vertical | 
|  | dst.move(i, v.zwxy); | 
|  | break; | 
|  | case 2:  // Diagonal | 
|  | dst.move(i, v.wzyx); | 
|  | break; | 
|  | default: | 
|  | // The SPIR-V spec doesn't define what happens in this case, | 
|  | // so the result in undefined. | 
|  | UNSUPPORTED("SPIR-V does not define a OpGroupNonUniformQuadSwap result for a direction of %d", direction); | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBallot: | 
|  | { | 
|  | ASSERT(type.componentCount == 4); | 
|  | Operand predicate(shader, *this, insn.word(4)); | 
|  | dst.move(0, SIMD::Int(SignMask(activeLaneMask() & predicate.Int(0))));  // Considers helper invocations active. See b/151137030 | 
|  | dst.move(1, SIMD::Int(0)); | 
|  | dst.move(2, SIMD::Int(0)); | 
|  | dst.move(3, SIMD::Int(0)); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformInverseBallot: | 
|  | { | 
|  | auto valueId = Object::ID(insn.word(4)); | 
|  | ASSERT(type.componentCount == 1); | 
|  | ASSERT(shader.getObjectType(valueId).componentCount == 4); | 
|  | Operand value(shader, *this, valueId); | 
|  | auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1); | 
|  | dst.move(0, -bit); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBallotBitExtract: | 
|  | { | 
|  | auto valueId = Object::ID(insn.word(4)); | 
|  | auto indexId = Object::ID(insn.word(5)); | 
|  | ASSERT(type.componentCount == 1); | 
|  | ASSERT(shader.getObjectType(valueId).componentCount == 4); | 
|  | ASSERT(shader.getObjectType(indexId).componentCount == 1); | 
|  | Operand value(shader, *this, valueId); | 
|  | Operand index(shader, *this, indexId); | 
|  | auto vecIdx = index.Int(0) / SIMD::Int(32); | 
|  | auto bitIdx = index.Int(0) & SIMD::Int(31); | 
|  | auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) | | 
|  | (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) | | 
|  | (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) | | 
|  | (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3))); | 
|  | dst.move(0, -((bits >> bitIdx) & SIMD::Int(1))); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBallotBitCount: | 
|  | { | 
|  | auto operation = spv::GroupOperation(insn.word(4)); | 
|  | auto valueId = Object::ID(insn.word(5)); | 
|  | ASSERT(type.componentCount == 1); | 
|  | ASSERT(shader.getObjectType(valueId).componentCount == 4); | 
|  | Operand value(shader, *this, valueId); | 
|  | switch(operation) | 
|  | { | 
|  | case spv::GroupOperationReduce: | 
|  | dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15))); | 
|  | break; | 
|  | case spv::GroupOperationInclusiveScan: | 
|  | dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15))); | 
|  | break; | 
|  | case spv::GroupOperationExclusiveScan: | 
|  | dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7))); | 
|  | break; | 
|  | default: | 
|  | UNSUPPORTED("GroupOperation %d", int(operation)); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBallotFindLSB: | 
|  | { | 
|  | auto valueId = Object::ID(insn.word(4)); | 
|  | ASSERT(type.componentCount == 1); | 
|  | ASSERT(shader.getObjectType(valueId).componentCount == 4); | 
|  | Operand value(shader, *this, valueId); | 
|  | dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true)); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBallotFindMSB: | 
|  | { | 
|  | auto valueId = Object::ID(insn.word(4)); | 
|  | ASSERT(type.componentCount == 1); | 
|  | ASSERT(shader.getObjectType(valueId).componentCount == 4); | 
|  | Operand value(shader, *this, valueId); | 
|  | dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false)); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformShuffle: | 
|  | { | 
|  | Operand value(shader, *this, insn.word(4)); | 
|  | Operand id(shader, *this, insn.word(5)); | 
|  | auto x = CmpEQ(SIMD::Int(0), id.Int(0)); | 
|  | auto y = CmpEQ(SIMD::Int(1), id.Int(0)); | 
|  | auto z = CmpEQ(SIMD::Int(2), id.Int(0)); | 
|  | auto w = CmpEQ(SIMD::Int(3), id.Int(0)); | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | SIMD::Int v = value.Int(i); | 
|  | dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww)); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformShuffleXor: | 
|  | { | 
|  | Operand value(shader, *this, insn.word(4)); | 
|  | Operand mask(shader, *this, insn.word(5)); | 
|  | auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); | 
|  | auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); | 
|  | auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); | 
|  | auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | SIMD::Int v = value.Int(i); | 
|  | dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww)); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformShuffleUp: | 
|  | { | 
|  | Operand value(shader, *this, insn.word(4)); | 
|  | Operand delta(shader, *this, insn.word(5)); | 
|  | auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0)); | 
|  | auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0)); | 
|  | auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0)); | 
|  | auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0)); | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | SIMD::Int v = value.Int(i); | 
|  | dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx)); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformShuffleDown: | 
|  | { | 
|  | Operand value(shader, *this, insn.word(4)); | 
|  | Operand delta(shader, *this, insn.word(5)); | 
|  | auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0)); | 
|  | auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0)); | 
|  | auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0)); | 
|  | auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0)); | 
|  | for(auto i = 0u; i < type.componentCount; i++) | 
|  | { | 
|  | SIMD::Int v = value.Int(i); | 
|  | dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww)); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | // The remaining instructions are GroupNonUniformArithmetic operations | 
|  | default: | 
|  | auto &type = shader.getType(Type::ID(insn.word(1))); | 
|  | auto operation = static_cast<spv::GroupOperation>(insn.word(4)); | 
|  | Operand value(shader, *this, insn.word(5)); | 
|  | auto mask = As<SIMD::UInt>(activeLaneMask());  // Considers helper invocations active. See b/151137030 | 
|  |  | 
|  | for(uint32_t i = 0; i < type.componentCount; i++) | 
|  | { | 
|  | switch(insn.opcode()) | 
|  | { | 
|  | case spv::OpGroupNonUniformIAdd: | 
|  | dst.move(i, BinaryOperation<SIMD::Int>( | 
|  | operation, value.UInt(i), mask, 0, | 
|  | [](auto a, auto b) { return a + b; })); | 
|  | break; | 
|  | case spv::OpGroupNonUniformFAdd: | 
|  | dst.move(i, BinaryOperation<SIMD::Float>( | 
|  | operation, value.UInt(i), mask, 0.0f, | 
|  | [](auto a, auto b) { return a + b; })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformIMul: | 
|  | dst.move(i, BinaryOperation<SIMD::Int>( | 
|  | operation, value.UInt(i), mask, 1, | 
|  | [](auto a, auto b) { return a * b; })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformFMul: | 
|  | dst.move(i, BinaryOperation<SIMD::Float>( | 
|  | operation, value.UInt(i), mask, 1.0f, | 
|  | [](auto a, auto b) { return a * b; })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBitwiseAnd: | 
|  | dst.move(i, BinaryOperation<SIMD::UInt>( | 
|  | operation, value.UInt(i), mask, ~0u, | 
|  | [](auto a, auto b) { return a & b; })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBitwiseOr: | 
|  | dst.move(i, BinaryOperation<SIMD::UInt>( | 
|  | operation, value.UInt(i), mask, 0, | 
|  | [](auto a, auto b) { return a | b; })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformBitwiseXor: | 
|  | dst.move(i, BinaryOperation<SIMD::UInt>( | 
|  | operation, value.UInt(i), mask, 0, | 
|  | [](auto a, auto b) { return a ^ b; })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformSMin: | 
|  | dst.move(i, BinaryOperation<SIMD::Int>( | 
|  | operation, value.UInt(i), mask, INT32_MAX, | 
|  | [](auto a, auto b) { return Min(a, b); })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformUMin: | 
|  | dst.move(i, BinaryOperation<SIMD::UInt>( | 
|  | operation, value.UInt(i), mask, ~0u, | 
|  | [](auto a, auto b) { return Min(a, b); })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformFMin: | 
|  | dst.move(i, BinaryOperation<SIMD::Float>( | 
|  | operation, value.UInt(i), mask, SIMD::Float::infinity(), | 
|  | [](auto a, auto b) { return NMin(a, b); })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformSMax: | 
|  | dst.move(i, BinaryOperation<SIMD::Int>( | 
|  | operation, value.UInt(i), mask, INT32_MIN, | 
|  | [](auto a, auto b) { return Max(a, b); })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformUMax: | 
|  | dst.move(i, BinaryOperation<SIMD::UInt>( | 
|  | operation, value.UInt(i), mask, 0, | 
|  | [](auto a, auto b) { return Max(a, b); })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformFMax: | 
|  | dst.move(i, BinaryOperation<SIMD::Float>( | 
|  | operation, value.UInt(i), mask, -SIMD::Float::infinity(), | 
|  | [](auto a, auto b) { return NMax(a, b); })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformLogicalAnd: | 
|  | dst.move(i, BinaryOperation<SIMD::UInt>( | 
|  | operation, value.UInt(i), mask, ~0u, | 
|  | [](auto a, auto b) { | 
|  | SIMD::UInt zero = SIMD::UInt(0); | 
|  | return CmpNEQ(a, zero) & CmpNEQ(b, zero); | 
|  | })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformLogicalOr: | 
|  | dst.move(i, BinaryOperation<SIMD::UInt>( | 
|  | operation, value.UInt(i), mask, 0, | 
|  | [](auto a, auto b) { | 
|  | SIMD::UInt zero = SIMD::UInt(0); | 
|  | return CmpNEQ(a, zero) | CmpNEQ(b, zero); | 
|  | })); | 
|  | break; | 
|  |  | 
|  | case spv::OpGroupNonUniformLogicalXor: | 
|  | dst.move(i, BinaryOperation<SIMD::UInt>( | 
|  | operation, value.UInt(i), mask, 0, | 
|  | [](auto a, auto b) { | 
|  | SIMD::UInt zero = SIMD::UInt(0); | 
|  | return CmpNEQ(a, zero) ^ CmpNEQ(b, zero); | 
|  | })); | 
|  | break; | 
|  |  | 
|  | default: | 
|  | UNSUPPORTED("EmitGroupNonUniform op: %s", shader.OpcodeName(type.opcode())); | 
|  | } | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | }  // namespace sw |