[vulkan] Add VK_SUBGROUP_FEATURE_ARITHMETIC_BIT support
As the name suggest, this CL adds many operations related
to arithmetic subgroup operations, i.e.:
- Reduction / Inclusive Scan / Exclusive Scans for:
- IntAdd, UIntAdd, FloatAdd
- IntMul, UIntMul, FloatMul,
- IntMin, UIntMin, FloatMin,
- IntMax, UIntMax, FloatMax,
- BitwiseAnd, BitwiseOr, BitwiseXor
- LogicalAnd, LogicalOr, LogicalXor
The implementation uses a single template function to
implement all these, based on the fact that these are
all binary commutative operations.
NOTE: Only 32-bit values are supported.
To make scans efficient, a new Reactor operation, named
Blend() is introduced. It is used to mix two input vectors
using 4 3-bit indices (encoded in a single 16-bit value)
to select the result's lane values.
A new unit-test is added to ReactorUnittests to check its
behaviour. Unfortunately, the test takes about 2 minutes
on a fast workstation when doing a full scan, so it will
by default only check 1/11th of all possible values
(see comments in the patch for more details).
Also, Float4::positive_inf() and Float4::negative_inf()
methods were added, since trying to build Float4(INFINITY)
will trigger a DCHECK() in the Float4(float) constructor,
and the infinity values are required by the subgroup
floating-point scan operations.
Bug: b/142002682
Test: dEQP-VK.subgroups.arithmetic.*
Change-Id: I86f509fc47f7475ca126615ed698ee493ae835ef
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/38929
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: David Turner <digit@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index bb118b0..7b817df 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -363,6 +363,7 @@
case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
+ case spv::CapabilityGroupNonUniformArithmetic: capabilities.GroupNonUniformArithmetic = true; break;
case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
@@ -627,6 +628,22 @@
case spv::OpGroupNonUniformShuffleXor:
case spv::OpGroupNonUniformShuffleUp:
case spv::OpGroupNonUniformShuffleDown:
+ case spv::OpGroupNonUniformIAdd:
+ case spv::OpGroupNonUniformFAdd:
+ case spv::OpGroupNonUniformIMul:
+ case spv::OpGroupNonUniformFMul:
+ case spv::OpGroupNonUniformSMin:
+ case spv::OpGroupNonUniformUMin:
+ case spv::OpGroupNonUniformFMin:
+ case spv::OpGroupNonUniformSMax:
+ case spv::OpGroupNonUniformUMax:
+ case spv::OpGroupNonUniformFMax:
+ case spv::OpGroupNonUniformBitwiseAnd:
+ case spv::OpGroupNonUniformBitwiseOr:
+ case spv::OpGroupNonUniformBitwiseXor:
+ case spv::OpGroupNonUniformLogicalAnd:
+ case spv::OpGroupNonUniformLogicalOr:
+ case spv::OpGroupNonUniformLogicalXor:
case spv::OpCopyObject:
case spv::OpArrayLength:
// Instructions that yield an intermediate value or divergent pointer
@@ -1865,6 +1882,22 @@
case spv::OpGroupNonUniformShuffleXor:
case spv::OpGroupNonUniformShuffleUp:
case spv::OpGroupNonUniformShuffleDown:
+ case spv::OpGroupNonUniformIAdd:
+ case spv::OpGroupNonUniformFAdd:
+ case spv::OpGroupNonUniformIMul:
+ case spv::OpGroupNonUniformFMul:
+ case spv::OpGroupNonUniformSMin:
+ case spv::OpGroupNonUniformUMin:
+ case spv::OpGroupNonUniformFMin:
+ case spv::OpGroupNonUniformSMax:
+ case spv::OpGroupNonUniformUMax:
+ case spv::OpGroupNonUniformFMax:
+ case spv::OpGroupNonUniformBitwiseAnd:
+ case spv::OpGroupNonUniformBitwiseOr:
+ case spv::OpGroupNonUniformBitwiseXor:
+ case spv::OpGroupNonUniformLogicalAnd:
+ case spv::OpGroupNonUniformLogicalOr:
+ case spv::OpGroupNonUniformLogicalXor:
return EmitGroupNonUniform(insn, state);
case spv::OpArrayLength:
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index ad0f740..899ff7e 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -499,6 +499,7 @@
bool GroupNonUniformBallot : 1;
bool GroupNonUniformShuffle : 1;
bool GroupNonUniformShuffleRelative : 1;
+ bool GroupNonUniformArithmetic : 1;
bool DeviceGroup : 1;
bool MultiView : 1;
};
@@ -1088,6 +1089,8 @@
// Returns 0 when invalid.
static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
+
+ struct GroupOps;
};
class SpirvRoutine
diff --git a/src/Pipeline/SpirvShaderGroup.cpp b/src/Pipeline/SpirvShaderGroup.cpp
index c47c666..ee6df3f 100644
--- a/src/Pipeline/SpirvShaderGroup.cpp
+++ b/src/Pipeline/SpirvShaderGroup.cpp
@@ -18,6 +18,64 @@
namespace sw {
+struct SpirvShader::GroupOps {
+
+ // Template function to perform a binary operation.
+ // |TYPE| should be the type of the identity value (as an SIMD::<Type>).
+ // |APPLY| should be a callable object that takes two RValue<TYPE> parameters
+ // and returns a new RValue<TYPE> corresponding to the operation's result.
+ template <typename TYPE, typename APPLY>
+ static void BinaryOperation(
+ const SpirvShader* shader,
+ const SpirvShader::InsnIterator& insn,
+ const SpirvShader::EmitState* state,
+ Intermediate& dst,
+ const TYPE& identity,
+ APPLY&& apply)
+ {
+ SpirvShader::GenericValue value(shader, state, insn.word(5));
+ auto &type = shader->getType(SpirvShader::Type::ID(insn.word(1)));
+ for (auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto mask = As<SIMD::UInt>(state->activeLaneMask());
+ SIMD::UInt v_uint = (value.UInt(i) & mask) | (As<SIMD::UInt>(identity) & ~mask);
+ TYPE v = As<TYPE>(v_uint);
+ switch (spv::GroupOperation(insn.word(4)))
+ {
+ case spv::GroupOperationReduce:
+ {
+ // NOTE: floating-point add and multiply are not really commutative so
+ // ensure that all values in the final lanes are identical
+ TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw]
+ TYPE v3 = apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw]
+ dst.move(i, v3);
+ break;
+ }
+ case spv::GroupOperationInclusiveScan:
+ {
+ TYPE v2 = apply(v, Blend(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw]
+ TYPE v3 = apply(v2, Blend(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
+ dst.move(i, v3);
+ break;
+ }
+ case spv::GroupOperationExclusiveScan:
+ {
+ TYPE v2 = apply(v, Blend(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw]
+ TYPE v3 = apply(v2, Blend(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
+ auto v4 = Blend(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz]
+ dst.move(i, v4);
+ break;
+ }
+ default:
+ UNIMPLEMENTED("EmitGroupNonUniform op: %s Group operation: %d",
+ SpirvShader::OpcodeName(type.opcode()).c_str(), insn.word(4));
+ }
+ }
+ }
+
+};
+
+
SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
{
static_assert(SIMD::Width == 4, "EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4");
@@ -258,10 +316,195 @@
break;
}
+ case spv::OpGroupNonUniformIAdd:
+ {
+ using Type = SIMD::Int;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type>a, RValue<Type>b){ return a + b; }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformFAdd:
+ {
+ using Type = SIMD::Float;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0.),
+ [](RValue<Type>a, RValue<Type>b){ return a + b; }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformIMul:
+ {
+ using Type = SIMD::Int;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(1),
+ [](RValue<Type>a, RValue<Type>b){ return a * b; }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformFMul:
+ {
+ using Type = SIMD::Float;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(1.),
+ [](RValue<Type>a, RValue<Type>b){ return a * b; }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformBitwiseAnd:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(~0u),
+ [](RValue<Type>a, RValue<Type>b){ return a & b; }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformBitwiseOr:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type>a, RValue<Type>b){ return a | b; }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformBitwiseXor:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type>a, RValue<Type>b){ return a ^ b; }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformSMin:
+ {
+ using Type = SIMD::Int;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(INT32_MAX),
+ [](RValue<Type>a, RValue<Type>b){ return Min(a, b); }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformUMin:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(~0u),
+ [](RValue<Type>a, RValue<Type>b){ return Min(a, b); }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformFMin:
+ {
+ using Type = SIMD::Float;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type::positive_inf(),
+ [](RValue<Type>a, RValue<Type>b){ return NMin(a, b); }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformSMax:
+ {
+ using Type = SIMD::Int;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(INT32_MIN),
+ [](RValue<Type>a, RValue<Type>b){ return Max(a, b); }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformUMax:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type>a, RValue<Type>b){ return Max(a, b); }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformFMax:
+ {
+ using Type = SIMD::Float;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type::negative_inf(),
+ [](RValue<Type>a, RValue<Type>b){ return NMax(a, b); }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformLogicalAnd:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(~0u),
+ [](RValue<Type>a, RValue<Type>b){
+ SIMD::UInt zero = SIMD::UInt(0);
+ return CmpNEQ(a, zero) & CmpNEQ(b, zero);
+ }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformLogicalOr:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type>a, RValue<Type>b){
+ SIMD::UInt zero = SIMD::UInt(0);
+ return CmpNEQ(a, zero) | CmpNEQ(b, zero);
+ }
+ );
+ break;
+ }
+
+ case spv::OpGroupNonUniformLogicalXor:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type>a, RValue<Type>b){
+ SIMD::UInt zero = SIMD::UInt(0);
+ return CmpNEQ(a, zero) ^ CmpNEQ(b, zero);
+ }
+ );
+ break;
+ }
+
default:
UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
}
return EmitResult::Continue;
}
-} // namespace sw
\ No newline at end of file
+} // namespace sw
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index 3d57b17..6f6a8de 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -108,6 +108,35 @@
unmaterializedVariables.clear();
}
+ // NOTE: Only 12 bits out of 16 of the |select| value are used.
+ // More specifically, the value should look like:
+ //
+ // msb lsb
+ // v v
+ // [.aaa|.bbb|.ccc|.ddd] where '.' means an ignored bit
+ //
+ // This format makes it easy to write calls with hexadecimal select values,
+ // since each hex digit is a separate swizzle index. Note that the order
+ // of indices is reversed compared to createSwizzle4() below!
+ //
+ // For example:
+ // createBlend4( [a,b,c,d], [e,f,g,h], 0x0123 ) -> [a,b,c,d]
+ // createBlend4( [a,b,c,d], [e,f,g,h], 0x4567 ) -> [e,f,g,h]
+ // createBlend4( [a,b,c,d], [e,f,g,h], 0x4012 ) -> [e,a,b,c]
+ //
+ static Value *createBlend4(Value *lhs, Value *rhs, unsigned short select)
+ {
+ int swizzle[4] =
+ {
+ (select >> 12) & 0x07,
+ (select >> 8) & 0x07,
+ (select >> 4) & 0x07,
+ (select >> 0) & 0x07,
+ };
+
+ return Nucleus::createShuffleVector(lhs, rhs, swizzle);
+ }
+
static Value *createSwizzle4(Value *val, unsigned char select)
{
int swizzle[4] =
@@ -3481,6 +3510,11 @@
return RValue<Int4>(createSwizzle4(x.value, select));
}
+ RValue<Int4> Blend(RValue<Int4> x, RValue<Int4> y, unsigned short select)
+ {
+ return RValue<Int4>(createBlend4(x.value, y.value, select));
+ }
+
UInt4::UInt4() : XYZW(this)
{
}
@@ -3716,6 +3750,11 @@
return RValue<UInt4>(createSwizzle4(x.value, select));
}
+ RValue<UInt4> Blend(RValue<UInt4> x, RValue<UInt4> y, unsigned short select)
+ {
+ return RValue<UInt4>(createBlend4(x.value, y.value, select));
+ }
+
Half::Half(RValue<Float> cast)
{
UInt fp32i = As<UInt>(cast);
@@ -3805,7 +3844,7 @@
// being reinterpreted as float and then bitcast to integer again,
// which does not guarantee preserving the integer value.
//
- // Should inifinty and NaN constants be required, methods like
+ // Should infinity and NaN constants be required, methods like
// infinity(), quiet_NaN(), and signaling_NaN() should be added
// to the Float class.
ASSERT(std::isfinite(x));
@@ -4026,6 +4065,27 @@
constant(x, y, z, w);
}
+ Float4 Float4::positive_inf()
+ {
+ Float4 result;
+ result.infinity_constant(false);
+ return result;
+ }
+
+ Float4 Float4::negative_inf()
+ {
+ Float4 result;
+ result.infinity_constant(true);
+ return result;
+ }
+
+ void Float4::infinity_constant(bool negative)
+ {
+ double inf = negative ? -INFINITY : INFINITY;
+ double constantVector[4] = {inf, inf, inf, inf};
+ storeValue(Nucleus::createConstantVector(constantVector, getType()));
+ }
+
void Float4::constant(float x, float y, float z, float w)
{
// See Float(float) constructor for the rationale behind this assert.
@@ -4190,6 +4250,11 @@
return RValue<Float4>(createSwizzle4(x.value, select));
}
+ RValue<Float4> Blend(RValue<Float4> x, RValue<Float4> y, unsigned short select)
+ {
+ return RValue<Float4>(createBlend4(x.value, y.value, select));
+ }
+
RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
{
int shuffle[4] =
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 81757a9..5fdc73a 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -1946,6 +1946,7 @@
RValue<Int4> Insert(RValue<Int4> val, RValue<Int> element, int i);
RValue<Int> SignMask(RValue<Int4> x);
RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select);
+ RValue<Int4> Blend(RValue<Int4> x, RValue<Int4> y, unsigned short select);
RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y);
class UInt4 : public LValue<UInt4>, public XYZW<UInt4>
@@ -2030,6 +2031,7 @@
RValue<UInt4> Insert(RValue<UInt4> val, RValue<UInt> element, int i);
// RValue<UInt4> RoundInt(RValue<Float4> cast);
RValue<UInt4> Swizzle(RValue<UInt4> x, unsigned char select);
+ RValue<UInt4> Blend(RValue<UInt4> x, RValue<UInt4> y, unsigned short select);
class Half : public LValue<Half>
{
@@ -2227,9 +2229,11 @@
RValue<Float4> operator=(const Swizzle4<Float4, T> &rhs);
static Type *getType();
-
+ static Float4 negative_inf();
+ static Float4 positive_inf();
private:
void constant(float x, float y, float z, float w);
+ void infinity_constant(bool negative);
};
RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs);
@@ -2254,6 +2258,7 @@
RValue<Float4> Insert(RValue<Float4> val, RValue<Float> element, int i);
RValue<Float> Extract(RValue<Float4> x, int i);
RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select);
+ RValue<Float4> Blend(RValue<Float4> x, RValue<Float4> y, unsigned short select);
RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm);
RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y);
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index 06dcf20..fe25a01 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -470,6 +470,106 @@
}
+TEST(ReactorUnitTests, Blend)
+{
+ {
+ // |select| is [0aaa:0bbb:0ccc:0ddd] where |aaa|, |bbb|, |ccc|
+ // and |ddd| are 7-bit selection indices. For a total (1 << 12)
+ // possibilities.
+ const int kSelectRange = 1 << 12;
+
+ // Unfortunately, testing the whole kSelectRange results in a test
+ // that is far too slow to run, because LLVM spends exponentially more
+ // time optimizing the function below as the number of test cases
+ // increases.
+ //
+ // To work-around the problem, only test a subset of the range by
+ // skipping every kRangeIncrement value.
+ //
+ // Set this value to 1 if you want to test the whole implementation,
+ // which will take a little less than 2 minutes on a fast workstation.
+ //
+ // The default value here takes about 1390ms, which is a little more than
+ // what the Swizzle test takes (993 ms) on my machine. A non-power-of-2
+ // value ensures a better spread over possible values.
+ const int kRangeIncrement = 11;
+
+ auto rangeIndexToSelect = [](int i) {
+ return static_cast<unsigned short>(
+ (((i >> 9) & 7) << 0) |
+ (((i >> 6) & 7) << 4) |
+ (((i >> 3) & 7) << 8) |
+ (((i >> 0) & 7) << 12)
+ );
+ };
+
+ FunctionT<int(void*)> function;
+ {
+ Pointer<Byte> out = function.Arg<0>();
+
+ for(int i = 0; i < kSelectRange; i += kRangeIncrement)
+ {
+ unsigned short select = rangeIndexToSelect(i);
+
+ *Pointer<Float4>(out + 16 * i) = Blend(Float4(1.0f, 2.0f, 3.0f, 4.0f),
+ Float4(5.0f, 6.0f, 7.0f, 8.0f),
+ select);
+
+ *Pointer<Int4>(out + (kSelectRange + i) * 16) = Blend(Int4(10, 11, 12, 13),
+ Int4(14, 15, 16, 17),
+ select);
+
+ *Pointer<UInt4>(out + (2 * kSelectRange + i) * 16) = Blend(UInt4(100, 101, 102, 103),
+ UInt4(104, 105, 106, 107),
+ select);
+ }
+
+ Return(0);
+ }
+
+ auto routine = function("one");
+
+ if(routine)
+ {
+ struct
+ {
+ float f[kSelectRange][4];
+ int i[kSelectRange][4];
+ unsigned u[kSelectRange][4];
+ } out;
+
+ memset(&out, 0, sizeof(out));
+
+ routine(&out);
+
+ for(int i = 0; i < kSelectRange; i += kRangeIncrement)
+ {
+ EXPECT_EQ(out.f[i][0], float(1.0f + (i & 7)));
+ EXPECT_EQ(out.f[i][1], float(1.0f + ((i >> 3) & 7)));
+ EXPECT_EQ(out.f[i][2], float(1.0f + ((i >> 6) & 7)));
+ EXPECT_EQ(out.f[i][3], float(1.0f + ((i >> 9) & 7)));
+ }
+
+ for(int i = 0; i < kSelectRange; i += kRangeIncrement)
+ {
+ EXPECT_EQ(out.i[i][0], int(10 + (i & 7)));
+ EXPECT_EQ(out.i[i][1], int(10 + ((i >> 3) & 7)));
+ EXPECT_EQ(out.i[i][2], int(10 + ((i >> 6) & 7)));
+ EXPECT_EQ(out.i[i][3], int(10 + ((i >> 9) & 7)));
+ }
+
+ for(int i = 0; i < kSelectRange; i += kRangeIncrement)
+ {
+ EXPECT_EQ(out.u[i][0], unsigned(100 + (i & 7)));
+ EXPECT_EQ(out.u[i][1], unsigned(100 + ((i >> 3) & 7)));
+ EXPECT_EQ(out.u[i][2], unsigned(100 + ((i >> 6) & 7)));
+ EXPECT_EQ(out.u[i][3], unsigned(100 + ((i >> 9) & 7)));
+ }
+ }
+ }
+
+}
+
TEST(ReactorUnitTests, Branching)
{
{
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index c86853f..08fa5c5 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -347,6 +347,7 @@
properties->supportedOperations =
VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT |
+ VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;