clang-format the src/Pipeline directory
Bug: b/144825072
Change-Id: I869aef91d6318bf6955581e5dad762800bd46296
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39655
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index 1d6d0a3..4ee8375 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -26,16 +26,21 @@
namespace {
-enum { X, Y, Z };
+enum
+{
+ X,
+ Y,
+ Z
+};
} // anonymous namespace
namespace sw {
ComputeProgram::ComputeProgram(SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets)
- : shader(shader),
- pipelineLayout(pipelineLayout),
- descriptorSets(descriptorSets)
+ : shader(shader)
+ , pipelineLayout(pipelineLayout)
+ , descriptorSets(descriptorSets)
{
}
@@ -53,46 +58,41 @@
shader->emitEpilog(&routine);
}
-void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3])
+void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3])
{
- routine->setInputBuiltin(shader, spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
auto numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
- As<SIMD::Float>(SIMD::Int(Extract(numWorkgroups, component)));
+ As<SIMD::Float>(SIMD::Int(Extract(numWorkgroups, component)));
}
});
- routine->setInputBuiltin(shader, spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
- As<SIMD::Float>(SIMD::Int(workgroupID[component]));
+ As<SIMD::Float>(SIMD::Int(workgroupID[component]));
}
});
- routine->setInputBuiltin(shader, spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
auto workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
- As<SIMD::Float>(SIMD::Int(Extract(workgroupSize, component)));
+ As<SIMD::Float>(SIMD::Int(Extract(workgroupSize, component)));
}
});
- routine->setInputBuiltin(shader, spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 1);
auto subgroupsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, subgroupsPerWorkgroup));
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupsPerWorkgroup));
});
- routine->setInputBuiltin(shader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 1);
auto invocationsPerSubgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerSubgroup));
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(invocationsPerSubgroup));
@@ -101,7 +101,7 @@
routine->setImmutableInputBuiltins(shader);
}
-void ComputeProgram::setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex)
+void ComputeProgram::setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex)
{
Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
Int4 workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
@@ -114,35 +114,31 @@
{
SIMD::Int idx = localInvocationIndex;
localInvocationID[Z] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY);
- idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo
+ idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo
localInvocationID[Y] = idx / SIMD::Int(workgroupSizeX);
- idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX); // modulo
+ idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX); // modulo
localInvocationID[X] = idx;
}
- routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(localInvocationIndex);
});
- routine->setInputBuiltin(shader, spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupIndex));
});
- routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
{
value[builtin.FirstComponent + component] =
- As<SIMD::Float>(localInvocationID[component]);
+ As<SIMD::Float>(localInvocationID[component]);
}
});
- routine->setInputBuiltin(shader, spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine->setInputBuiltin(shader, spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
SIMD::Int wgID = 0;
wgID = Insert(wgID, workgroupID[X], X);
wgID = Insert(wgID, workgroupID[Y], Y);
@@ -156,7 +152,7 @@
});
}
-void ComputeProgram::emit(SpirvRoutine* routine)
+void ComputeProgram::emit(SpirvRoutine *routine)
{
Pointer<Byte> data = Arg<0>();
Int workgroupX = Arg<1>();
@@ -174,7 +170,7 @@
Int invocationsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerWorkgroup));
- Int workgroupID[3] = {workgroupX, workgroupY, workgroupZ};
+ Int workgroupID[3] = { workgroupX, workgroupY, workgroupZ };
setWorkgroupBuiltins(data, routine, workgroupID);
For(Int i = 0, i < subgroupCount, i++)
@@ -194,11 +190,11 @@
}
void ComputeProgram::run(
- vk::DescriptorSet::Bindings const &descriptorSets,
- vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
- PushConstantStorage const &pushConstants,
- uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
- uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
+ vk::DescriptorSet::Bindings const &descriptorSets,
+ vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
+ PushConstantStorage const &pushConstants,
+ uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+ uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
{
auto &modes = shader->getModes();
@@ -231,8 +227,7 @@
for(uint32_t batchID = 0; batchID < batchCount && batchID < groupCount; batchID++)
{
wg.add(1);
- marl::schedule([=, &data]
- {
+ marl::schedule([=, &data] {
defer(wg.done());
std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
index 75f0cf4..9b93f4e 100644
--- a/src/Pipeline/ComputeProgram.hpp
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -17,13 +17,15 @@
#include "SpirvShader.hpp"
-#include "Reactor/Coroutine.hpp"
#include "Device/Context.hpp"
+#include "Reactor/Coroutine.hpp"
#include "Vulkan/VkDescriptorSet.hpp"
#include <functional>
-namespace vk { class PipelineLayout; }
+namespace vk {
+class PipelineLayout;
+}
namespace sw {
@@ -34,13 +36,13 @@
// ComputeProgram builds a SPIR-V compute shader.
class ComputeProgram : public Coroutine<SpirvShader::YieldResult(
- void* data,
- int32_t workgroupX,
- int32_t workgroupY,
- int32_t workgroupZ,
- void* workgroupMemory,
- int32_t firstSubgroup,
- int32_t subgroupCount)>
+ void *data,
+ int32_t workgroupX,
+ int32_t workgroupY,
+ int32_t workgroupZ,
+ void *workgroupMemory,
+ int32_t firstSubgroup,
+ int32_t subgroupCount)>
{
public:
ComputeProgram(SpirvShader const *spirvShader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets);
@@ -52,35 +54,35 @@
// run executes the compute shader routine for all workgroups.
void run(
- vk::DescriptorSet::Bindings const &descriptorSetBindings,
- vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
- PushConstantStorage const &pushConstants,
- uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
- uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
+ vk::DescriptorSet::Bindings const &descriptorSetBindings,
+ vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
+ PushConstantStorage const &pushConstants,
+ uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+ uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
protected:
- void emit(SpirvRoutine* routine);
- void setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3]);
- void setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex);
+ void emit(SpirvRoutine *routine);
+ void setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3]);
+ void setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex);
struct Data
{
vk::DescriptorSet::Bindings descriptorSets;
vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets;
- uint4 numWorkgroups; // [x, y, z, 0]
- uint4 workgroupSize; // [x, y, z, 0]
- uint32_t invocationsPerSubgroup; // SPIR-V: "SubgroupSize"
- uint32_t subgroupsPerWorkgroup; // SPIR-V: "NumSubgroups"
- uint32_t invocationsPerWorkgroup; // Total number of invocations per workgroup.
+ uint4 numWorkgroups; // [x, y, z, 0]
+ uint4 workgroupSize; // [x, y, z, 0]
+ uint32_t invocationsPerSubgroup; // SPIR-V: "SubgroupSize"
+ uint32_t subgroupsPerWorkgroup; // SPIR-V: "NumSubgroups"
+ uint32_t invocationsPerWorkgroup; // Total number of invocations per workgroup.
PushConstantStorage pushConstants;
const Constants *constants;
};
- SpirvShader const * const shader;
- vk::PipelineLayout const * const pipelineLayout;
+ SpirvShader const *const shader;
+ vk::PipelineLayout const *const pipelineLayout;
const vk::DescriptorSet::Bindings &descriptorSets;
};
} // namespace sw
-#endif // sw_ComputeProgram_hpp
+#endif // sw_ComputeProgram_hpp
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp
index 6eaf92d..1f98769 100644
--- a/src/Pipeline/Constants.cpp
+++ b/src/Pipeline/Constants.cpp
@@ -14,8 +14,8 @@
#include "Constants.hpp"
-#include "System/Math.hpp"
#include "System/Half.hpp"
+#include "System/Math.hpp"
#include <cstring>
@@ -25,8 +25,7 @@
Constants::Constants()
{
- static const unsigned int transposeBit0[16] =
- {
+ static const unsigned int transposeBit0[16] = {
0x00000000,
0x00000001,
0x00000010,
@@ -45,8 +44,7 @@
0x00001111
};
- static const unsigned int transposeBit1[16] =
- {
+ static const unsigned int transposeBit1[16] = {
0x00000000,
0x00000002,
0x00000020,
@@ -65,8 +63,7 @@
0x00002222
};
- static const unsigned int transposeBit2[16] =
- {
+ static const unsigned int transposeBit2[16] = {
0x00000000,
0x00000004,
0x00000040,
@@ -89,74 +86,71 @@
memcpy(&this->transposeBit1, transposeBit1, sizeof(transposeBit1));
memcpy(&this->transposeBit2, transposeBit2, sizeof(transposeBit2));
- static const ushort4 cWeight[17] =
- {
- {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}, // 0xFFFF / 1 = 0xFFFF
- {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}, // 0xFFFF / 1 = 0xFFFF
- {0x8000, 0x8000, 0x8000, 0x8000}, // 0xFFFF / 2 = 0x8000
- {0x5555, 0x5555, 0x5555, 0x5555}, // 0xFFFF / 3 = 0x5555
- {0x4000, 0x4000, 0x4000, 0x4000}, // 0xFFFF / 4 = 0x4000
- {0x3333, 0x3333, 0x3333, 0x3333}, // 0xFFFF / 5 = 0x3333
- {0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA}, // 0xFFFF / 6 = 0x2AAA
- {0x2492, 0x2492, 0x2492, 0x2492}, // 0xFFFF / 7 = 0x2492
- {0x2000, 0x2000, 0x2000, 0x2000}, // 0xFFFF / 8 = 0x2000
- {0x1C71, 0x1C71, 0x1C71, 0x1C71}, // 0xFFFF / 9 = 0x1C71
- {0x1999, 0x1999, 0x1999, 0x1999}, // 0xFFFF / 10 = 0x1999
- {0x1745, 0x1745, 0x1745, 0x1745}, // 0xFFFF / 11 = 0x1745
- {0x1555, 0x1555, 0x1555, 0x1555}, // 0xFFFF / 12 = 0x1555
- {0x13B1, 0x13B1, 0x13B1, 0x13B1}, // 0xFFFF / 13 = 0x13B1
- {0x1249, 0x1249, 0x1249, 0x1249}, // 0xFFFF / 14 = 0x1249
- {0x1111, 0x1111, 0x1111, 0x1111}, // 0xFFFF / 15 = 0x1111
- {0x1000, 0x1000, 0x1000, 0x1000}, // 0xFFFF / 16 = 0x1000
+ static const ushort4 cWeight[17] = {
+ { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }, // 0xFFFF / 1 = 0xFFFF
+ { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }, // 0xFFFF / 1 = 0xFFFF
+ { 0x8000, 0x8000, 0x8000, 0x8000 }, // 0xFFFF / 2 = 0x8000
+ { 0x5555, 0x5555, 0x5555, 0x5555 }, // 0xFFFF / 3 = 0x5555
+ { 0x4000, 0x4000, 0x4000, 0x4000 }, // 0xFFFF / 4 = 0x4000
+ { 0x3333, 0x3333, 0x3333, 0x3333 }, // 0xFFFF / 5 = 0x3333
+ { 0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA }, // 0xFFFF / 6 = 0x2AAA
+ { 0x2492, 0x2492, 0x2492, 0x2492 }, // 0xFFFF / 7 = 0x2492
+ { 0x2000, 0x2000, 0x2000, 0x2000 }, // 0xFFFF / 8 = 0x2000
+ { 0x1C71, 0x1C71, 0x1C71, 0x1C71 }, // 0xFFFF / 9 = 0x1C71
+ { 0x1999, 0x1999, 0x1999, 0x1999 }, // 0xFFFF / 10 = 0x1999
+ { 0x1745, 0x1745, 0x1745, 0x1745 }, // 0xFFFF / 11 = 0x1745
+ { 0x1555, 0x1555, 0x1555, 0x1555 }, // 0xFFFF / 12 = 0x1555
+ { 0x13B1, 0x13B1, 0x13B1, 0x13B1 }, // 0xFFFF / 13 = 0x13B1
+ { 0x1249, 0x1249, 0x1249, 0x1249 }, // 0xFFFF / 14 = 0x1249
+ { 0x1111, 0x1111, 0x1111, 0x1111 }, // 0xFFFF / 15 = 0x1111
+ { 0x1000, 0x1000, 0x1000, 0x1000 }, // 0xFFFF / 16 = 0x1000
};
- static const float4 uvWeight[17] =
- {
- {1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f},
- {1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f},
- {1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f},
- {1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f},
- {1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f},
- {1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f},
- {1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f},
- {1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f},
- {1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f},
- {1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f},
- {1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f},
- {1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f},
- {1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f},
- {1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f},
- {1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f},
- {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f},
- {1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f},
+ static const float4 uvWeight[17] = {
+ { 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f },
+ { 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f },
+ { 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f },
+ { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f },
+ { 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f },
+ { 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f },
+ { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f },
+ { 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f },
+ { 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f },
+ { 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f },
+ { 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f },
+ { 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f },
+ { 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f },
+ { 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f },
+ { 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f },
+ { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f },
+ { 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f },
};
- static const float4 uvStart[17] =
- {
- {-0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f},
- {-0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f},
- {-1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f},
- {-2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f},
- {-3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f},
- {-4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f},
- {-5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f},
- {-6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f},
- {-7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f},
- {-8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f},
- {-9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f},
- {-10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f},
- {-11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f},
- {-12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f},
- {-13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f},
- {-14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f},
- {-15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f},
+ static const float4 uvStart[17] = {
+ { -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f },
+ { -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f },
+ { -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f },
+ { -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f },
+ { -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f },
+ { -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f },
+ { -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f },
+ { -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f },
+ { -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f },
+ { -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f },
+ { -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f },
+ { -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f },
+ { -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f },
+ { -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f },
+ { -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f },
+ { -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f },
+ { -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f },
};
memcpy(&this->cWeight, cWeight, sizeof(cWeight));
memcpy(&this->uvWeight, uvWeight, sizeof(uvWeight));
memcpy(&this->uvStart, uvStart, sizeof(uvStart));
- static const unsigned int occlusionCount[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
+ static const unsigned int occlusionCount[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
memcpy(&this->occlusionCount, &occlusionCount, sizeof(occlusionCount));
@@ -253,24 +247,24 @@
for(int i = 0; i < 4; i++)
{
- maskW01Q[i][0] = -(i >> 0 & 1);
- maskW01Q[i][1] = -(i >> 1 & 1);
- maskW01Q[i][2] = -(i >> 0 & 1);
- maskW01Q[i][3] = -(i >> 1 & 1);
+ maskW01Q[i][0] = -(i >> 0 & 1);
+ maskW01Q[i][1] = -(i >> 1 & 1);
+ maskW01Q[i][2] = -(i >> 0 & 1);
+ maskW01Q[i][3] = -(i >> 1 & 1);
- maskD01X[i][0] = -(i >> 0 & 1);
- maskD01X[i][1] = -(i >> 1 & 1);
- maskD01X[i][2] = -(i >> 0 & 1);
- maskD01X[i][3] = -(i >> 1 & 1);
+ maskD01X[i][0] = -(i >> 0 & 1);
+ maskD01X[i][1] = -(i >> 1 & 1);
+ maskD01X[i][2] = -(i >> 0 & 1);
+ maskD01X[i][3] = -(i >> 1 & 1);
}
for(int i = 0; i < 16; i++)
{
mask10Q[i][0] = mask10Q[i][1] =
- (i & 0x1 ? 0x3FF : 0) |
- (i & 0x2 ? 0xFFC00 : 0) |
- (i & 0x4 ? 0x3FF00000 : 0) |
- (i & 0x8 ? 0xC0000000 : 0);
+ (i & 0x1 ? 0x3FF : 0) |
+ (i & 0x2 ? 0xFFC00 : 0) |
+ (i & 0x4 ? 0x3FF00000 : 0) |
+ (i & 0x8 ? 0xC0000000 : 0);
}
for(int i = 0; i < 256; i++)
@@ -327,13 +321,13 @@
memcpy(&this->X, &X, sizeof(X));
memcpy(&this->Y, &Y, sizeof(Y));
- const dword maxX[16] = {0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101};
- const dword maxY[16] = {0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202};
- const dword maxZ[16] = {0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404};
- const dword minX[16] = {0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808};
- const dword minY[16] = {0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010};
- const dword minZ[16] = {0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020};
- const dword fini[16] = {0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080};
+ const dword maxX[16] = { 0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101 };
+ const dword maxY[16] = { 0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202 };
+ const dword maxZ[16] = { 0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404 };
+ const dword minX[16] = { 0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808 };
+ const dword minY[16] = { 0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010 };
+ const dword minZ[16] = { 0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020 };
+ const dword fini[16] = { 0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080 };
memcpy(&this->maxX, &maxX, sizeof(maxX));
memcpy(&this->maxY, &maxY, sizeof(maxY));
@@ -343,17 +337,17 @@
memcpy(&this->minZ, &minZ, sizeof(minZ));
memcpy(&this->fini, &fini, sizeof(fini));
- static const dword4 maxPos = {0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE};
+ static const dword4 maxPos = { 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE };
memcpy(&this->maxPos, &maxPos, sizeof(maxPos));
- static const float4 unscaleByte = {1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF};
- static const float4 unscaleSByte = {1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F};
- static const float4 unscaleShort = {1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF};
- static const float4 unscaleUShort = {1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF};
- static const float4 unscaleInt = {1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF};
- static const float4 unscaleUInt = {1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF};
- static const float4 unscaleFixed = {1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000};
+ static const float4 unscaleByte = { 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF };
+ static const float4 unscaleSByte = { 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F };
+ static const float4 unscaleShort = { 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF };
+ static const float4 unscaleUShort = { 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF };
+ static const float4 unscaleInt = { 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF };
+ static const float4 unscaleUInt = { 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF };
+ static const float4 unscaleFixed = { 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000 };
memcpy(&this->unscaleByte, &unscaleByte, sizeof(unscaleByte));
memcpy(&this->unscaleSByte, &unscaleSByte, sizeof(unscaleSByte));
@@ -365,7 +359,7 @@
for(int i = 0; i <= 0xFFFF; i++)
{
- half2float[i] = (float)reinterpret_cast<half&>(i);
+ half2float[i] = (float)reinterpret_cast<half &>(i);
}
}
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp
index 484682b..c9c8a1f 100644
--- a/src/Pipeline/Constants.hpp
+++ b/src/Pipeline/Constants.hpp
@@ -15,8 +15,8 @@
#ifndef sw_Constants_hpp
#define sw_Constants_hpp
-#include "System/Types.hpp"
#include "System/Math.hpp"
+#include "System/Types.hpp"
#include "Vulkan/VkConfig.h"
namespace sw {
@@ -68,8 +68,8 @@
word4 maskW01Q[4];
dword4 maskD01X[4];
word4 mask565Q[8];
- dword2 mask10Q[16]; // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x
- word4 mask5551Q[16]; // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x
+ dword2 mask10Q[16]; // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x
+ word4 mask5551Q[16]; // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x
unsigned short sRGBtoLinear8_16[256];
@@ -91,10 +91,10 @@
// VK_SAMPLE_COUNT_4_BIT
// https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#primsrast-multisampling
static constexpr float VkSampleLocations4[][2] = {
- {0.375, 0.125},
- {0.875, 0.375},
- {0.125, 0.625},
- {0.625, 0.875},
+ { 0.375, 0.125 },
+ { 0.875, 0.375 },
+ { 0.125, 0.625 },
+ { 0.625, 0.875 },
};
// Vulkan spec sample positions are relative to 0,0 in top left corner, with Y+ going down.
@@ -141,6 +141,6 @@
extern Constants constants;
-} // namepsace sw
+} // namespace sw
-#endif // sw_Constants_hpp
+#endif // sw_Constants_hpp
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 332f82e..21296ed 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -56,48 +56,43 @@
return mask;
}
-void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4])
+void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4])
{
routine.setImmutableInputBuiltins(spirvShader);
- routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID)))));
});
- routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
assert(builtin.SizeInComponents == 4);
- value[builtin.FirstComponent+0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f);
- value[builtin.FirstComponent+1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f);
- value[builtin.FirstComponent+2] = z[0]; // sample 0
- value[builtin.FirstComponent+3] = w;
+ value[builtin.FirstComponent + 0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f);
+ value[builtin.FirstComponent + 1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f);
+ value[builtin.FirstComponent + 2] = z[0]; // sample 0
+ value[builtin.FirstComponent + 3] = w;
});
- routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
assert(builtin.SizeInComponents == 2);
- value[builtin.FirstComponent+0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) +
- SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX))));
- value[builtin.FirstComponent+1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) +
- SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY))));
+ value[builtin.FirstComponent + 0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) +
+ SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX))));
+ value[builtin.FirstComponent + 1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) +
+ SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY))));
});
- routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
});
- routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask));
});
- routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1);
- routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1);
+ routine.windowSpacePosition[0] = x + SIMD::Int(0, 1, 0, 1);
+ routine.windowSpacePosition[1] = y + SIMD::Int(0, 0, 1, 1);
routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID));
}
@@ -169,7 +164,7 @@
for(auto i = 0u; i < state.multiSample; i++)
{
- cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1<<i), SIMD::Int(0)));
+ cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << i), SIMD::Int(0)));
}
}
@@ -211,78 +206,78 @@
auto format = state.targetFormat[index];
switch(format)
{
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_SRGB:
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_R8G8_UNORM:
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_R16G16_UNORM:
- case VK_FORMAT_R16G16B16A16_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
- case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
- for(unsigned int q = 0; q < state.multiSample; q++)
- {
- if(state.multiSampleMask & (1 << q))
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_R8G8_UNORM:
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_R16G16_UNORM:
+ case VK_FORMAT_R16G16B16A16_UNORM:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ for(unsigned int q = 0; q < state.multiSample; q++)
{
- Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
- Vector4s color;
+ if(state.multiSampleMask & (1 << q))
+ {
+ Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
+ Vector4s color;
- color.x = convertFixed16(c[index].x, false);
- color.y = convertFixed16(c[index].y, false);
- color.z = convertFixed16(c[index].z, false);
- color.w = convertFixed16(c[index].w, false);
+ color.x = convertFixed16(c[index].x, false);
+ color.y = convertFixed16(c[index].y, false);
+ color.z = convertFixed16(c[index].z, false);
+ color.w = convertFixed16(c[index].w, false);
- alphaBlend(index, buffer, color, x);
- writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+ alphaBlend(index, buffer, color, x);
+ writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+ }
}
- }
- break;
- case VK_FORMAT_R16_SFLOAT:
- case VK_FORMAT_R16G16_SFLOAT:
- case VK_FORMAT_R16G16B16A16_SFLOAT:
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- case VK_FORMAT_R32_SFLOAT:
- case VK_FORMAT_R32G32_SFLOAT:
- case VK_FORMAT_R32G32B32A32_SFLOAT:
- case VK_FORMAT_R32_SINT:
- case VK_FORMAT_R32G32_SINT:
- case VK_FORMAT_R32G32B32A32_SINT:
- case VK_FORMAT_R32_UINT:
- case VK_FORMAT_R32G32_UINT:
- case VK_FORMAT_R32G32B32A32_UINT:
- case VK_FORMAT_R16_SINT:
- case VK_FORMAT_R16G16_SINT:
- case VK_FORMAT_R16G16B16A16_SINT:
- case VK_FORMAT_R16_UINT:
- case VK_FORMAT_R16G16_UINT:
- case VK_FORMAT_R16G16B16A16_UINT:
- case VK_FORMAT_R8_SINT:
- case VK_FORMAT_R8G8_SINT:
- case VK_FORMAT_R8G8B8A8_SINT:
- case VK_FORMAT_R8_UINT:
- case VK_FORMAT_R8G8_UINT:
- case VK_FORMAT_R8G8B8A8_UINT:
- case VK_FORMAT_A8B8G8R8_UINT_PACK32:
- case VK_FORMAT_A8B8G8R8_SINT_PACK32:
- case VK_FORMAT_A2B10G10R10_UINT_PACK32:
- for(unsigned int q = 0; q < state.multiSample; q++)
- {
- if(state.multiSampleMask & (1 << q))
+ break;
+ case VK_FORMAT_R16_SFLOAT:
+ case VK_FORMAT_R16G16_SFLOAT:
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ case VK_FORMAT_R16_SINT:
+ case VK_FORMAT_R16G16_SINT:
+ case VK_FORMAT_R16G16B16A16_SINT:
+ case VK_FORMAT_R16_UINT:
+ case VK_FORMAT_R16G16_UINT:
+ case VK_FORMAT_R16G16B16A16_UINT:
+ case VK_FORMAT_R8_SINT:
+ case VK_FORMAT_R8G8_SINT:
+ case VK_FORMAT_R8G8B8A8_SINT:
+ case VK_FORMAT_R8_UINT:
+ case VK_FORMAT_R8G8_UINT:
+ case VK_FORMAT_R8G8B8A8_UINT:
+ case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+ case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+ case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+ for(unsigned int q = 0; q < state.multiSample; q++)
{
- Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
- Vector4f color = c[index];
+ if(state.multiSampleMask & (1 << q))
+ {
+ Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
+ Vector4f color = c[index];
- alphaBlend(index, buffer, color, x);
- writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+ alphaBlend(index, buffer, color, x);
+ writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+ }
}
- }
- break;
- default:
- UNIMPLEMENTED("VkFormat: %d", int(format));
+ break;
+ default:
+ UNIMPLEMENTED("VkFormat: %d", int(format));
}
}
}
@@ -298,62 +293,66 @@
switch(state.targetFormat[index])
{
- case VK_FORMAT_UNDEFINED:
- break;
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_SRGB:
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_R8G8_UNORM:
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_R16G16_UNORM:
- case VK_FORMAT_R16G16B16A16_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
- case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
- oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
- oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
- oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
- oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
- break;
- case VK_FORMAT_R32_SFLOAT:
- case VK_FORMAT_R32G32_SFLOAT:
- case VK_FORMAT_R32G32B32A32_SFLOAT:
- case VK_FORMAT_R32_SINT:
- case VK_FORMAT_R32G32_SINT:
- case VK_FORMAT_R32G32B32A32_SINT:
- case VK_FORMAT_R32_UINT:
- case VK_FORMAT_R32G32_UINT:
- case VK_FORMAT_R32G32B32A32_UINT:
- case VK_FORMAT_R16_SFLOAT:
- case VK_FORMAT_R16G16_SFLOAT:
- case VK_FORMAT_R16G16B16A16_SFLOAT:
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- case VK_FORMAT_R16_SINT:
- case VK_FORMAT_R16G16_SINT:
- case VK_FORMAT_R16G16B16A16_SINT:
- case VK_FORMAT_R16_UINT:
- case VK_FORMAT_R16G16_UINT:
- case VK_FORMAT_R16G16B16A16_UINT:
- case VK_FORMAT_R8_SINT:
- case VK_FORMAT_R8G8_SINT:
- case VK_FORMAT_R8G8B8A8_SINT:
- case VK_FORMAT_R8_UINT:
- case VK_FORMAT_R8G8_UINT:
- case VK_FORMAT_R8G8B8A8_UINT:
- case VK_FORMAT_A8B8G8R8_UINT_PACK32:
- case VK_FORMAT_A8B8G8R8_SINT_PACK32:
- case VK_FORMAT_A2B10G10R10_UINT_PACK32:
- break;
- default:
- UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+ case VK_FORMAT_UNDEFINED:
+ break;
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_R8G8_UNORM:
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_R16G16_UNORM:
+ case VK_FORMAT_R16G16B16A16_UNORM:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ oC[index].x = Max(oC[index].x, Float4(0.0f));
+ oC[index].x = Min(oC[index].x, Float4(1.0f));
+ oC[index].y = Max(oC[index].y, Float4(0.0f));
+ oC[index].y = Min(oC[index].y, Float4(1.0f));
+ oC[index].z = Max(oC[index].z, Float4(0.0f));
+ oC[index].z = Min(oC[index].z, Float4(1.0f));
+ oC[index].w = Max(oC[index].w, Float4(0.0f));
+ oC[index].w = Min(oC[index].w, Float4(1.0f));
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ case VK_FORMAT_R16_SFLOAT:
+ case VK_FORMAT_R16G16_SFLOAT:
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ case VK_FORMAT_R16_SINT:
+ case VK_FORMAT_R16G16_SINT:
+ case VK_FORMAT_R16G16B16A16_SINT:
+ case VK_FORMAT_R16_UINT:
+ case VK_FORMAT_R16G16_UINT:
+ case VK_FORMAT_R16G16B16A16_UINT:
+ case VK_FORMAT_R8_SINT:
+ case VK_FORMAT_R8G8_SINT:
+ case VK_FORMAT_R8G8B8A8_SINT:
+ case VK_FORMAT_R8_UINT:
+ case VK_FORMAT_R8G8_UINT:
+ case VK_FORMAT_R8G8B8A8_UINT:
+ case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+ case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+ case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+ break;
+ default:
+ UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
}
}
}
-Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
+Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
{
Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
@@ -361,4 +360,4 @@
return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
}
-} // namepsace sw
+} // namespace sw
diff --git a/src/Pipeline/PixelProgram.hpp b/src/Pipeline/PixelProgram.hpp
index 7888115..59994fb 100644
--- a/src/Pipeline/PixelProgram.hpp
+++ b/src/Pipeline/PixelProgram.hpp
@@ -23,18 +23,18 @@
{
public:
PixelProgram(
- const PixelProcessor::State &state,
- vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *spirvShader,
- const vk::DescriptorSet::Bindings &descriptorSets) :
- PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets)
+ const PixelProcessor::State &state,
+ vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *spirvShader,
+ const vk::DescriptorSet::Bindings &descriptorSets)
+ : PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets)
{
}
virtual ~PixelProgram() {}
protected:
- virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]);
+ virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]);
virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]);
virtual Bool alphaTest(Int cMask[4]);
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 6b06146..b2ae9d0 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -14,24 +14,24 @@
#include "PixelRoutine.hpp"
-#include "SamplerCore.hpp"
#include "Constants.hpp"
-#include "Device/Renderer.hpp"
-#include "Device/QuadRasterizer.hpp"
+#include "SamplerCore.hpp"
#include "Device/Primitive.hpp"
+#include "Device/QuadRasterizer.hpp"
+#include "Device/Renderer.hpp"
#include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkPipelineLayout.hpp"
namespace sw {
PixelRoutine::PixelRoutine(
- const PixelProcessor::State &state,
- vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *spirvShader,
- const vk::DescriptorSet::Bindings &descriptorSets)
- : QuadRasterizer(state, spirvShader),
- routine(pipelineLayout),
- descriptorSets(descriptorSets)
+ const PixelProcessor::State &state,
+ vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *spirvShader,
+ const vk::DescriptorSet::Bindings &descriptorSets)
+ : QuadRasterizer(state, spirvShader)
+ , routine(pipelineLayout)
+ , descriptorSets(descriptorSets)
{
if(spirvShader)
{
@@ -55,8 +55,8 @@
// TODO: consider shader which modifies sample mask in general
const bool earlyDepthTest = !spirvShader || (spirvShader->getModes().EarlyFragmentTests && !spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
- Int zMask[4]; // Depth mask
- Int sMask[4]; // Stencil mask
+ Int zMask[4]; // Depth mask
+ Int sMask[4]; // Stencil mask
for(unsigned int q = 0; q < state.multiSample; q++)
{
@@ -72,7 +72,7 @@
Float4 f;
Float4 rhwCentroid;
- Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
+ Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16);
if(interpolateZ())
{
@@ -82,10 +82,10 @@
if(state.multiSample > 1)
{
- x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
+ x -= *Pointer<Float4>(constants + OFFSET(Constants, X) + q * sizeof(float4));
}
- z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
+ z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive, z), false, false, state.depthClamp);
}
}
@@ -101,7 +101,7 @@
If(depthPass || Bool(!earlyDepthTest))
{
- Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
+ Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16);
// Centroid locations
Float4 XXXX = Float4(0.0f);
@@ -113,9 +113,9 @@
for(unsigned int q = 0; q < state.multiSample; q++)
{
- XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
- YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
- WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
+ XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]);
+ YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]);
+ WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]);
}
WWWW = Rcp_pp(WWWW);
@@ -128,12 +128,12 @@
if(interpolateW())
{
- w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false);
+ w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive, w), false, false, false);
rhw = reciprocal(w, false, false, true);
if(state.centroid)
{
- rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
+ rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, w), false, false));
}
}
@@ -147,16 +147,16 @@
if(input.Centroid && state.multiSample > 1)
{
routine.inputs[interpolant] =
- interpolateCentroid(XXXX, YYYY, rhwCentroid,
- primitive + OFFSET(Primitive, V[interpolant]),
- input.Flat, !input.NoPerspective);
+ interpolateCentroid(XXXX, YYYY, rhwCentroid,
+ primitive + OFFSET(Primitive, V[interpolant]),
+ input.Flat, !input.NoPerspective);
}
else
{
routine.inputs[interpolant] =
- interpolate(xxxx, Dv[interpolant], rhw,
- primitive + OFFSET(Primitive, V[interpolant]),
- input.Flat, !input.NoPerspective, false);
+ interpolate(xxxx, Dv[interpolant], rhw,
+ primitive + OFFSET(Primitive, V[interpolant]),
+ input.Flat, !input.NoPerspective, false);
}
}
}
@@ -166,8 +166,8 @@
for(uint32_t i = 0; i < state.numClipDistances; i++)
{
auto distance = interpolate(xxxx, DclipDistance[i], rhw,
- primitive + OFFSET(Primitive, clipDistance[i]),
- false, true, false);
+ primitive + OFFSET(Primitive, clipDistance[i]),
+ false, true, false);
auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0)));
for(auto ms = 0u; ms < state.multiSample; ms++)
@@ -202,9 +202,9 @@
if(i < it->second.SizeInComponents)
{
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
- interpolate(xxxx, DcullDistance[i], rhw,
- primitive + OFFSET(Primitive, cullDistance[i]),
- false, true, false);
+ interpolate(xxxx, DcullDistance[i], rhw,
+ primitive + OFFSET(Primitive, cullDistance[i]),
+ false, true, false);
}
}
}
@@ -250,7 +250,7 @@
if(state.occlusionEnabled)
{
- occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
+ occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q]));
}
}
}
@@ -271,12 +271,12 @@
Float4 PixelRoutine::interpolateCentroid(const Float4 &x, const Float4 &y, const Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
{
- Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
+ Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, C), 16);
if(!flat)
{
- interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
- y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
+ interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16) +
+ y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, B), 16);
if(perspective)
{
@@ -300,7 +300,7 @@
if(q > 0)
{
- buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
+ buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB));
}
Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
@@ -310,20 +310,20 @@
if(state.frontStencil.compareMask != 0xff)
{
- value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
+ value &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].testMaskQ));
}
stencilTest(value, state.frontStencil.compareOp, false);
if(state.backStencil.compareMask != 0xff)
{
- valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
+ valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].testMaskQ));
}
stencilTest(valueBack, state.backStencil.compareOp, true);
- value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
- valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
+ value &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask));
+ valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask));
value |= valueBack;
sMask = SignMask(value) & cMask;
@@ -335,43 +335,43 @@
switch(stencilCompareMode)
{
- case VK_COMPARE_OP_ALWAYS:
- value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
- break;
- case VK_COMPARE_OP_NEVER:
- value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
- break;
- case VK_COMPARE_OP_LESS: // a < b ~ b > a
- value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
- value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
- break;
- case VK_COMPARE_OP_EQUAL:
- value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
- break;
- case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b)
- value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
- value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
- break;
- case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ (b > a) || (a == b)
- equal = value;
- equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
- value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
- value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
- value |= equal;
- break;
- case VK_COMPARE_OP_GREATER: // a > b
- equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ));
- value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
- equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
- value = equal;
- break;
- case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ !(a < b) ~ !(b > a)
- value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
- value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
- value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
- break;
- default:
- UNIMPLEMENTED("VkCompareOp: %d", int(stencilCompareMode));
+ case VK_COMPARE_OP_ALWAYS:
+ value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+ break;
+ case VK_COMPARE_OP_NEVER:
+ value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ break;
+ case VK_COMPARE_OP_LESS: // a < b ~ b > a
+ value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+ value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
+ break;
+ case VK_COMPARE_OP_EQUAL:
+ value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
+ break;
+ case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b)
+ value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
+ value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+ break;
+ case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ (b > a) || (a == b)
+ equal = value;
+ equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
+ value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+ value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
+ value |= equal;
+ break;
+ case VK_COMPARE_OP_GREATER: // a > b
+ equal = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ));
+ value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+ equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
+ value = equal;
+ break;
+ case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ !(a < b) ~ !(b > a)
+ value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+ value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
+ value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+ break;
+ default:
+ UNIMPLEMENTED("VkCompareOp: %d", int(stencilCompareMode));
}
}
@@ -385,11 +385,11 @@
}
Pointer<Byte> buffer = zBuffer + 4 * x;
- Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
+ Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
if(q > 0)
{
- buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+ buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
}
Float4 zValue;
@@ -405,45 +405,45 @@
switch(state.depthCompareMode)
{
- case VK_COMPARE_OP_ALWAYS:
- // Optimized
- break;
- case VK_COMPARE_OP_NEVER:
- // Optimized
- break;
- case VK_COMPARE_OP_EQUAL:
- zTest = CmpEQ(zValue, Z);
- break;
- case VK_COMPARE_OP_NOT_EQUAL:
- zTest = CmpNEQ(zValue, Z);
- break;
- case VK_COMPARE_OP_LESS:
- zTest = CmpNLE(zValue, Z);
- break;
- case VK_COMPARE_OP_GREATER_OR_EQUAL:
- zTest = CmpLE(zValue, Z);
- break;
- case VK_COMPARE_OP_LESS_OR_EQUAL:
- zTest = CmpNLT(zValue, Z);
- break;
- case VK_COMPARE_OP_GREATER:
- zTest = CmpLT(zValue, Z);
- break;
- default:
- UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
+ case VK_COMPARE_OP_ALWAYS:
+ // Optimized
+ break;
+ case VK_COMPARE_OP_NEVER:
+ // Optimized
+ break;
+ case VK_COMPARE_OP_EQUAL:
+ zTest = CmpEQ(zValue, Z);
+ break;
+ case VK_COMPARE_OP_NOT_EQUAL:
+ zTest = CmpNEQ(zValue, Z);
+ break;
+ case VK_COMPARE_OP_LESS:
+ zTest = CmpNLE(zValue, Z);
+ break;
+ case VK_COMPARE_OP_GREATER_OR_EQUAL:
+ zTest = CmpLE(zValue, Z);
+ break;
+ case VK_COMPARE_OP_LESS_OR_EQUAL:
+ zTest = CmpNLT(zValue, Z);
+ break;
+ case VK_COMPARE_OP_GREATER:
+ zTest = CmpLT(zValue, Z);
+ break;
+ default:
+ UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
}
switch(state.depthCompareMode)
{
- case VK_COMPARE_OP_ALWAYS:
- zMask = cMask;
- break;
- case VK_COMPARE_OP_NEVER:
- zMask = 0x0;
- break;
- default:
- zMask = SignMask(zTest) & cMask;
- break;
+ case VK_COMPARE_OP_ALWAYS:
+ zMask = cMask;
+ break;
+ case VK_COMPARE_OP_NEVER:
+ zMask = 0x0;
+ break;
+ default:
+ zMask = SignMask(zTest) & cMask;
+ break;
}
if(state.stencilActive)
@@ -468,7 +468,7 @@
if(q > 0)
{
- buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+ buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
}
Short4 zValue;
@@ -488,45 +488,45 @@
switch(state.depthCompareMode)
{
- case VK_COMPARE_OP_ALWAYS:
- // Optimized
- break;
- case VK_COMPARE_OP_NEVER:
- // Optimized
- break;
- case VK_COMPARE_OP_EQUAL:
- zTest = Int4(CmpEQ(zValue, Z));
- break;
- case VK_COMPARE_OP_NOT_EQUAL:
- zTest = ~Int4(CmpEQ(zValue, Z));
- break;
- case VK_COMPARE_OP_LESS:
- zTest = Int4(CmpGT(zValue, Z));
- break;
- case VK_COMPARE_OP_GREATER_OR_EQUAL:
- zTest = ~Int4(CmpGT(zValue, Z));
- break;
- case VK_COMPARE_OP_LESS_OR_EQUAL:
- zTest = ~Int4(CmpGT(Z, zValue));
- break;
- case VK_COMPARE_OP_GREATER:
- zTest = Int4(CmpGT(Z, zValue));
- break;
- default:
- UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
+ case VK_COMPARE_OP_ALWAYS:
+ // Optimized
+ break;
+ case VK_COMPARE_OP_NEVER:
+ // Optimized
+ break;
+ case VK_COMPARE_OP_EQUAL:
+ zTest = Int4(CmpEQ(zValue, Z));
+ break;
+ case VK_COMPARE_OP_NOT_EQUAL:
+ zTest = ~Int4(CmpEQ(zValue, Z));
+ break;
+ case VK_COMPARE_OP_LESS:
+ zTest = Int4(CmpGT(zValue, Z));
+ break;
+ case VK_COMPARE_OP_GREATER_OR_EQUAL:
+ zTest = ~Int4(CmpGT(zValue, Z));
+ break;
+ case VK_COMPARE_OP_LESS_OR_EQUAL:
+ zTest = ~Int4(CmpGT(Z, zValue));
+ break;
+ case VK_COMPARE_OP_GREATER:
+ zTest = Int4(CmpGT(Z, zValue));
+ break;
+ default:
+ UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
}
switch(state.depthCompareMode)
{
- case VK_COMPARE_OP_ALWAYS:
- zMask = cMask;
- break;
- case VK_COMPARE_OP_NEVER:
- zMask = 0x0;
- break;
- default:
- zMask = SignMask(zTest) & cMask;
- break;
+ case VK_COMPARE_OP_ALWAYS:
+ zMask = cMask;
+ break;
+ case VK_COMPARE_OP_NEVER:
+ zMask = 0x0;
+ break;
+ default:
+ zMask = SignMask(zTest) & cMask;
+ break;
}
if(state.stencilActive)
@@ -552,10 +552,10 @@
void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha)
{
- Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
- Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
- Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
- Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
+ Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c0)));
+ Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c1)));
+ Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c2)));
+ Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c3)));
Int aMask0 = SignMask(coverage0);
Int aMask1 = SignMask(coverage1);
@@ -578,11 +578,11 @@
}
Pointer<Byte> buffer = zBuffer + 4 * x;
- Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
+ Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
if(q > 0)
{
- buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+ buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
}
Float4 zValue;
@@ -594,8 +594,8 @@
zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
}
- Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
- zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
+ Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + zMask * 16, 16));
+ zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + zMask * 16, 16));
Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
// FIXME: Properly optimizes?
@@ -613,11 +613,11 @@
}
Pointer<Byte> buffer = zBuffer + 2 * x;
- Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
+ Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
if(q > 0)
{
- buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+ buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
}
Short4 zValue;
@@ -629,15 +629,15 @@
zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
}
- Z = Z & *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q) + zMask * 8, 8);
- zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q) + zMask * 8, 8);
+ Z = Z & *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q) + zMask * 8, 8);
+ zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q) + zMask * 8, 8);
Z = Z | zValue;
// FIXME: Properly optimizes?
*Pointer<Short>(buffer) = Extract(Z, 0);
- *Pointer<Short>(buffer+2) = Extract(Z, 1);
- *Pointer<Short>(buffer+pitch) = Extract(Z, 2);
- *Pointer<Short>(buffer+pitch+2) = Extract(Z, 3);
+ *Pointer<Short>(buffer + 2) = Extract(Z, 1);
+ *Pointer<Short>(buffer + pitch) = Extract(Z, 2);
+ *Pointer<Short>(buffer + pitch + 2) = Extract(Z, 3);
}
void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
@@ -677,7 +677,7 @@
if(q > 0)
{
- buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
+ buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB));
}
Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
@@ -686,11 +686,11 @@
Byte8 newValue;
stencilOperation(newValue, bufferValue, state.frontStencil, false, zMask, sMask);
- if((state.frontStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer
+ if((state.frontStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer
{
Byte8 maskedValue = bufferValue;
- newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
- maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
+ newValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].writeMaskQ));
+ maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].invWriteMaskQ));
newValue |= maskedValue;
}
@@ -698,20 +698,20 @@
stencilOperation(newValueBack, bufferValue, state.backStencil, true, zMask, sMask);
- if((state.backStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer
+ if((state.backStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer
{
Byte8 maskedValue = bufferValue;
- newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
- maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
+ newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].writeMaskQ));
+ maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].invWriteMaskQ));
newValueBack |= maskedValue;
}
- newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
- newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
+ newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask));
+ newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask));
newValue |= newValueBack;
- newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
- bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
+ newValue &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * cMask);
+ bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * cMask);
newValue |= bufferValue;
*Pointer<Short>(buffer) = Extract(As<Short4>(newValue), 0);
@@ -738,15 +738,15 @@
if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
{
- if(state.depthTestActive && ops.depthFailOp != ops.passOp) // zMask valid and values not the same
+ if(state.depthTestActive && ops.depthFailOp != ops.passOp) // zMask valid and values not the same
{
- pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
- zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
+ pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * zMask);
+ zFail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * zMask);
pass |= zFail;
}
- pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
- fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
+ pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * sMask);
+ fail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * sMask);
pass |= fail;
}
}
@@ -755,32 +755,32 @@
{
switch(operation)
{
- case VK_STENCIL_OP_KEEP:
- output = bufferValue;
- break;
- case VK_STENCIL_OP_ZERO:
- output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
- break;
- case VK_STENCIL_OP_REPLACE:
- output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceQ));
- break;
- case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
- output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
- break;
- case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
- output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
- break;
- case VK_STENCIL_OP_INVERT:
- output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
- break;
- case VK_STENCIL_OP_INCREMENT_AND_WRAP:
- output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
- break;
- case VK_STENCIL_OP_DECREMENT_AND_WRAP:
- output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
- break;
- default:
- UNIMPLEMENTED("VkStencilOp: %d", int(operation));
+ case VK_STENCIL_OP_KEEP:
+ output = bufferValue;
+ break;
+ case VK_STENCIL_OP_ZERO:
+ output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ break;
+ case VK_STENCIL_OP_REPLACE:
+ output = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ));
+ break;
+ case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
+ output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
+ break;
+ case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
+ output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
+ break;
+ case VK_STENCIL_OP_INVERT:
+ output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+ break;
+ case VK_STENCIL_OP_INCREMENT_AND_WRAP:
+ output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
+ break;
+ case VK_STENCIL_OP_DECREMENT_AND_WRAP:
+ output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
+ break;
+ default:
+ UNIMPLEMENTED("VkStencilOp: %d", int(operation));
}
}
@@ -788,80 +788,80 @@
{
switch(blendFactorActive)
{
- case VK_BLEND_FACTOR_ZERO:
- // Optimized
- break;
- case VK_BLEND_FACTOR_ONE:
- // Optimized
- break;
- case VK_BLEND_FACTOR_SRC_COLOR:
- blendFactor.x = current.x;
- blendFactor.y = current.y;
- blendFactor.z = current.z;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
- blendFactor.x = Short4(0xFFFFu) - current.x;
- blendFactor.y = Short4(0xFFFFu) - current.y;
- blendFactor.z = Short4(0xFFFFu) - current.z;
- break;
- case VK_BLEND_FACTOR_DST_COLOR:
- blendFactor.x = pixel.x;
- blendFactor.y = pixel.y;
- blendFactor.z = pixel.z;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
- blendFactor.x = Short4(0xFFFFu) - pixel.x;
- blendFactor.y = Short4(0xFFFFu) - pixel.y;
- blendFactor.z = Short4(0xFFFFu) - pixel.z;
- break;
- case VK_BLEND_FACTOR_SRC_ALPHA:
- blendFactor.x = current.w;
- blendFactor.y = current.w;
- blendFactor.z = current.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
- blendFactor.x = Short4(0xFFFFu) - current.w;
- blendFactor.y = Short4(0xFFFFu) - current.w;
- blendFactor.z = Short4(0xFFFFu) - current.w;
- break;
- case VK_BLEND_FACTOR_DST_ALPHA:
- blendFactor.x = pixel.w;
- blendFactor.y = pixel.w;
- blendFactor.z = pixel.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
- blendFactor.x = Short4(0xFFFFu) - pixel.w;
- blendFactor.y = Short4(0xFFFFu) - pixel.w;
- blendFactor.z = Short4(0xFFFFu) - pixel.w;
- break;
- case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
- blendFactor.x = Short4(0xFFFFu) - pixel.w;
- blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
- blendFactor.y = blendFactor.x;
- blendFactor.z = blendFactor.x;
- break;
- case VK_BLEND_FACTOR_CONSTANT_COLOR:
- blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
- blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
- blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
- blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
- blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
- blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
- break;
- case VK_BLEND_FACTOR_CONSTANT_ALPHA:
- blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
- blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
- blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
- blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
- blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
- blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
- break;
- default:
- UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
+ case VK_BLEND_FACTOR_ZERO:
+ // Optimized
+ break;
+ case VK_BLEND_FACTOR_ONE:
+ // Optimized
+ break;
+ case VK_BLEND_FACTOR_SRC_COLOR:
+ blendFactor.x = current.x;
+ blendFactor.y = current.y;
+ blendFactor.z = current.z;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+ blendFactor.x = Short4(0xFFFFu) - current.x;
+ blendFactor.y = Short4(0xFFFFu) - current.y;
+ blendFactor.z = Short4(0xFFFFu) - current.z;
+ break;
+ case VK_BLEND_FACTOR_DST_COLOR:
+ blendFactor.x = pixel.x;
+ blendFactor.y = pixel.y;
+ blendFactor.z = pixel.z;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+ blendFactor.x = Short4(0xFFFFu) - pixel.x;
+ blendFactor.y = Short4(0xFFFFu) - pixel.y;
+ blendFactor.z = Short4(0xFFFFu) - pixel.z;
+ break;
+ case VK_BLEND_FACTOR_SRC_ALPHA:
+ blendFactor.x = current.w;
+ blendFactor.y = current.w;
+ blendFactor.z = current.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+ blendFactor.x = Short4(0xFFFFu) - current.w;
+ blendFactor.y = Short4(0xFFFFu) - current.w;
+ blendFactor.z = Short4(0xFFFFu) - current.w;
+ break;
+ case VK_BLEND_FACTOR_DST_ALPHA:
+ blendFactor.x = pixel.w;
+ blendFactor.y = pixel.w;
+ blendFactor.z = pixel.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+ blendFactor.x = Short4(0xFFFFu) - pixel.w;
+ blendFactor.y = Short4(0xFFFFu) - pixel.w;
+ blendFactor.z = Short4(0xFFFFu) - pixel.w;
+ break;
+ case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+ blendFactor.x = Short4(0xFFFFu) - pixel.w;
+ blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
+ blendFactor.y = blendFactor.x;
+ blendFactor.z = blendFactor.x;
+ break;
+ case VK_BLEND_FACTOR_CONSTANT_COLOR:
+ blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[0]));
+ blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[1]));
+ blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[2]));
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+ blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[0]));
+ blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[1]));
+ blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[2]));
+ break;
+ case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+ blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
+ blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
+ blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+ blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
+ blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
+ blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
+ break;
+ default:
+ UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
}
}
@@ -869,49 +869,49 @@
{
switch(blendFactorAlphaActive)
{
- case VK_BLEND_FACTOR_ZERO:
- // Optimized
- break;
- case VK_BLEND_FACTOR_ONE:
- // Optimized
- break;
- case VK_BLEND_FACTOR_SRC_COLOR:
- blendFactor.w = current.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
- blendFactor.w = Short4(0xFFFFu) - current.w;
- break;
- case VK_BLEND_FACTOR_DST_COLOR:
- blendFactor.w = pixel.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
- blendFactor.w = Short4(0xFFFFu) - pixel.w;
- break;
- case VK_BLEND_FACTOR_SRC_ALPHA:
- blendFactor.w = current.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
- blendFactor.w = Short4(0xFFFFu) - current.w;
- break;
- case VK_BLEND_FACTOR_DST_ALPHA:
- blendFactor.w = pixel.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
- blendFactor.w = Short4(0xFFFFu) - pixel.w;
- break;
- case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
- blendFactor.w = Short4(0xFFFFu);
- break;
- case VK_BLEND_FACTOR_CONSTANT_COLOR:
- case VK_BLEND_FACTOR_CONSTANT_ALPHA:
- blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
- blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
- break;
- default:
- UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
+ case VK_BLEND_FACTOR_ZERO:
+ // Optimized
+ break;
+ case VK_BLEND_FACTOR_ONE:
+ // Optimized
+ break;
+ case VK_BLEND_FACTOR_SRC_COLOR:
+ blendFactor.w = current.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+ blendFactor.w = Short4(0xFFFFu) - current.w;
+ break;
+ case VK_BLEND_FACTOR_DST_COLOR:
+ blendFactor.w = pixel.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+ blendFactor.w = Short4(0xFFFFu) - pixel.w;
+ break;
+ case VK_BLEND_FACTOR_SRC_ALPHA:
+ blendFactor.w = current.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+ blendFactor.w = Short4(0xFFFFu) - current.w;
+ break;
+ case VK_BLEND_FACTOR_DST_ALPHA:
+ blendFactor.w = pixel.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+ blendFactor.w = Short4(0xFFFFu) - pixel.w;
+ break;
+ case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+ blendFactor.w = Short4(0xFFFFu);
+ break;
+ case VK_BLEND_FACTOR_CONSTANT_COLOR:
+ case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+ blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+ blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
+ break;
+ default:
+ UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
}
}
@@ -929,139 +929,140 @@
switch(state.targetFormat[index])
{
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- buffer += 2 * x;
- buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ buffer += 2 * x;
+ buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
- pixel.x = (c01 & Short4(0x7C00u)) << 1;
- pixel.y = (c01 & Short4(0x03E0u)) << 6;
- pixel.z = (c01 & Short4(0x001Fu)) << 11;
- pixel.w = (c01 & Short4(0x8000u)) >> 15;
+ pixel.x = (c01 & Short4(0x7C00u)) << 1;
+ pixel.y = (c01 & Short4(0x03E0u)) << 6;
+ pixel.z = (c01 & Short4(0x001Fu)) << 11;
+ pixel.w = (c01 & Short4(0x8000u)) >> 15;
- // Expand to 16 bit range
- pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
- pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
- pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
- pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
- pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
- pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
- break;
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- buffer += 2 * x;
- buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+ // Expand to 16 bit range
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
+ break;
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ buffer += 2 * x;
+ buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
- pixel.x = c01 & Short4(0xF800u);
- pixel.y = (c01 & Short4(0x07E0u)) << 5;
- pixel.z = (c01 & Short4(0x001Fu)) << 11;
- pixel.w = Short4(0xFFFFu);
+ pixel.x = c01 & Short4(0xF800u);
+ pixel.y = (c01 & Short4(0x07E0u)) << 5;
+ pixel.z = (c01 & Short4(0x001Fu)) << 11;
+ pixel.w = Short4(0xFFFFu);
- // Expand to 16 bit range
- pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
- pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
- pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6);
- pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12);
- pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
- pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
- break;
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_SRGB:
- buffer += 4 * x;
- c01 = *Pointer<Short4>(buffer);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- c23 = *Pointer<Short4>(buffer);
- pixel.z = c01;
- pixel.y = c01;
- pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
- pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
- pixel.x = pixel.z;
- pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
- pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
- pixel.y = pixel.z;
- pixel.w = pixel.x;
- pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
- pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
- pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
- pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
- break;
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_R8G8B8A8_SRGB:
- buffer += 4 * x;
- c01 = *Pointer<Short4>(buffer);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- c23 = *Pointer<Short4>(buffer);
- pixel.z = c01;
- pixel.y = c01;
- pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
- pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
- pixel.x = pixel.z;
- pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
- pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
- pixel.y = pixel.z;
- pixel.w = pixel.x;
- pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
- pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
- pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
- pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
- break;
- case VK_FORMAT_R8_UNORM:
- buffer += 1 * x;
- pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
- pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
- pixel.y = Short4(0x0000);
- pixel.z = Short4(0x0000);
- pixel.w = Short4(0xFFFFu);
- break;
- case VK_FORMAT_R8G8_UNORM:
- buffer += 2 * x;
- c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
- pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
- pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
- pixel.z = Short4(0x0000u);
- pixel.w = Short4(0xFFFFu);
- break;
- case VK_FORMAT_R16G16B16A16_UNORM:
- pixel.x = *Pointer<Short4>(buffer + 8 * x);
- pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.z = *Pointer<Short4>(buffer + 8 * x);
- pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
- transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
- break;
- case VK_FORMAT_R16G16_UNORM:
- pixel.x = *Pointer<Short4>(buffer + 4 * x);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.y = *Pointer<Short4>(buffer + 4 * x);
- pixel.z = pixel.x;
- pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
- pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
- pixel.y = pixel.z;
- pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
- pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
- pixel.z = Short4(0xFFFFu);
- pixel.w = Short4(0xFFFFu);
- break;
- case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
- {
- Int4 v = Int4(0);
- v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0);
- v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
- v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
+ // Expand to 16 bit range
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
+ break;
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ buffer += 4 * x;
+ c01 = *Pointer<Short4>(buffer);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ c23 = *Pointer<Short4>(buffer);
+ pixel.z = c01;
+ pixel.y = c01;
+ pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
+ pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
+ pixel.x = pixel.z;
+ pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
+ pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
+ pixel.y = pixel.z;
+ pixel.w = pixel.x;
+ pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
+ pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
+ pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
+ pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
+ break;
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ buffer += 4 * x;
+ c01 = *Pointer<Short4>(buffer);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ c23 = *Pointer<Short4>(buffer);
+ pixel.z = c01;
+ pixel.y = c01;
+ pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
+ pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
+ pixel.x = pixel.z;
+ pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
+ pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
+ pixel.y = pixel.z;
+ pixel.w = pixel.x;
+ pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
+ pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
+ pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
+ pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
+ break;
+ case VK_FORMAT_R8_UNORM:
+ buffer += 1 * x;
+ pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
+ pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
+ pixel.y = Short4(0x0000);
+ pixel.z = Short4(0x0000);
+ pixel.w = Short4(0xFFFFu);
+ break;
+ case VK_FORMAT_R8G8_UNORM:
+ buffer += 2 * x;
+ c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
+ pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
+ pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
+ pixel.z = Short4(0x0000u);
+ pixel.w = Short4(0xFFFFu);
+ break;
+ case VK_FORMAT_R16G16B16A16_UNORM:
+ pixel.x = *Pointer<Short4>(buffer + 8 * x);
+ pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.z = *Pointer<Short4>(buffer + 8 * x);
+ pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
+ transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
+ break;
+ case VK_FORMAT_R16G16_UNORM:
+ pixel.x = *Pointer<Short4>(buffer + 4 * x);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.y = *Pointer<Short4>(buffer + 4 * x);
+ pixel.z = pixel.x;
+ pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
+ pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
+ pixel.y = pixel.z;
+ pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
+ pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
+ pixel.z = Short4(0xFFFFu);
+ pixel.w = Short4(0xFFFFu);
+ break;
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ {
+ Int4 v = Int4(0);
+ v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0);
+ v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
+ v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
- pixel.x = Short4(v << 6) & Short4(0xFFC0u);
- pixel.y = Short4(v >> 4) & Short4(0xFFC0u);
- pixel.z = Short4(v >> 14) & Short4(0xFFC0u);
- pixel.w = Short4(v >> 16) & Short4(0xC000u);
- } break;
- default:
- UNIMPLEMENTED("VkFormat %d", state.targetFormat[index]);
+ pixel.x = Short4(v << 6) & Short4(0xFFC0u);
+ pixel.y = Short4(v >> 4) & Short4(0xFFC0u);
+ pixel.z = Short4(v >> 14) & Short4(0xFFC0u);
+ pixel.w = Short4(v >> 16) & Short4(0xC000u);
+ }
+ break;
+ default:
+ UNIMPLEMENTED("VkFormat %d", state.targetFormat[index]);
}
if(isSRGB(index))
@@ -1103,46 +1104,46 @@
switch(state.blendState[index].blendOperation)
{
- case VK_BLEND_OP_ADD:
- current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
- current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
- current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
- break;
- case VK_BLEND_OP_SUBTRACT:
- current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
- current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
- current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
- break;
- case VK_BLEND_OP_REVERSE_SUBTRACT:
- current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
- current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
- current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
- break;
- case VK_BLEND_OP_MIN:
- current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
- current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
- current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
- break;
- case VK_BLEND_OP_MAX:
- current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
- current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
- current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
- break;
- case VK_BLEND_OP_SRC_EXT:
- // No operation
- break;
- case VK_BLEND_OP_DST_EXT:
- current.x = pixel.x;
- current.y = pixel.y;
- current.z = pixel.z;
- break;
- case VK_BLEND_OP_ZERO_EXT:
- current.x = Short4(0x0000);
- current.y = Short4(0x0000);
- current.z = Short4(0x0000);
- break;
- default:
- UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
+ case VK_BLEND_OP_ADD:
+ current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
+ current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
+ current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
+ break;
+ case VK_BLEND_OP_SUBTRACT:
+ current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
+ current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
+ current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
+ break;
+ case VK_BLEND_OP_REVERSE_SUBTRACT:
+ current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
+ current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
+ current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
+ break;
+ case VK_BLEND_OP_MIN:
+ current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
+ current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
+ current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
+ break;
+ case VK_BLEND_OP_MAX:
+ current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
+ current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
+ current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
+ break;
+ case VK_BLEND_OP_SRC_EXT:
+ // No operation
+ break;
+ case VK_BLEND_OP_DST_EXT:
+ current.x = pixel.x;
+ current.y = pixel.y;
+ current.z = pixel.z;
+ break;
+ case VK_BLEND_OP_ZERO_EXT:
+ current.x = Short4(0x0000);
+ current.y = Short4(0x0000);
+ current.z = Short4(0x0000);
+ break;
+ default:
+ UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
}
blendFactorAlpha(sourceFactor, current, pixel, state.blendState[index].sourceBlendFactorAlpha);
@@ -1160,32 +1161,32 @@
switch(state.blendState[index].blendOperationAlpha)
{
- case VK_BLEND_OP_ADD:
- current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
- break;
- case VK_BLEND_OP_SUBTRACT:
- current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
- break;
- case VK_BLEND_OP_REVERSE_SUBTRACT:
- current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
- break;
- case VK_BLEND_OP_MIN:
- current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
- break;
- case VK_BLEND_OP_MAX:
- current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
- break;
- case VK_BLEND_OP_SRC_EXT:
- // No operation
- break;
- case VK_BLEND_OP_DST_EXT:
- current.w = pixel.w;
- break;
- case VK_BLEND_OP_ZERO_EXT:
- current.w = Short4(0x0000);
- break;
- default:
- UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
+ case VK_BLEND_OP_ADD:
+ current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
+ break;
+ case VK_BLEND_OP_SUBTRACT:
+ current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
+ break;
+ case VK_BLEND_OP_REVERSE_SUBTRACT:
+ current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
+ break;
+ case VK_BLEND_OP_MIN:
+ current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
+ break;
+ case VK_BLEND_OP_MAX:
+ current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
+ break;
+ case VK_BLEND_OP_SRC_EXT:
+ // No operation
+ break;
+ case VK_BLEND_OP_DST_EXT:
+ current.w = pixel.w;
+ break;
+ case VK_BLEND_OP_ZERO_EXT:
+ current.w = Short4(0x0000);
+ break;
+ default:
+ UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
}
}
@@ -1198,21 +1199,21 @@
switch(state.targetFormat[index])
{
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_SRGB:
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_R8G8_UNORM:
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
- current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
- current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
- current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
- current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
- break;
- default:
- break;
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_R8G8_UNORM:
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
+ current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
+ current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
+ current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
+ break;
+ default:
+ break;
}
int rgbaWriteMask = state.colorWriteActive(index);
@@ -1220,7 +1221,7 @@
switch(state.targetFormat[index])
{
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
{
current.w = current.w & Short4(0x8000u);
current.x = As<UShort4>(current.x & Short4(0xF800)) >> 1;
@@ -1230,7 +1231,7 @@
current.x = current.x | current.y | current.z | current.w;
}
break;
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
{
current.x = current.x & Short4(0xF800u);
current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
@@ -1239,121 +1240,121 @@
current.x = current.x | current.y | current.z;
}
break;
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_SRGB:
- if(rgbaWriteMask == 0x7)
- {
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ if(rgbaWriteMask == 0x7)
+ {
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+
+ current.z = As<Short4>(PackUnsigned(current.z, current.x));
+ current.y = As<Short4>(PackUnsigned(current.y, current.y));
+
+ current.x = current.z;
+ current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+ current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.y = current.z;
+ current.z = As<Short4>(UnpackLow(current.z, current.x));
+ current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ }
+ else
+ {
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+ current.w = As<Short4>(As<UShort4>(current.w) >> 8);
+
+ current.z = As<Short4>(PackUnsigned(current.z, current.x));
+ current.y = As<Short4>(PackUnsigned(current.y, current.w));
+
+ current.x = current.z;
+ current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+ current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.y = current.z;
+ current.z = As<Short4>(UnpackLow(current.z, current.x));
+ current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ }
+ break;
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ if(rgbaWriteMask == 0x7)
+ {
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+
+ current.z = As<Short4>(PackUnsigned(current.x, current.z));
+ current.y = As<Short4>(PackUnsigned(current.y, current.y));
+
+ current.x = current.z;
+ current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+ current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.y = current.z;
+ current.z = As<Short4>(UnpackLow(current.z, current.x));
+ current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ }
+ else
+ {
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+ current.w = As<Short4>(As<UShort4>(current.w) >> 8);
+
+ current.z = As<Short4>(PackUnsigned(current.x, current.z));
+ current.y = As<Short4>(PackUnsigned(current.y, current.w));
+
+ current.x = current.z;
+ current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+ current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.y = current.z;
+ current.z = As<Short4>(UnpackLow(current.z, current.x));
+ current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ }
+ break;
+ case VK_FORMAT_R8G8_UNORM:
current.x = As<Short4>(As<UShort4>(current.x) >> 8);
current.y = As<Short4>(As<UShort4>(current.y) >> 8);
- current.z = As<Short4>(As<UShort4>(current.z) >> 8);
-
- current.z = As<Short4>(PackUnsigned(current.z, current.x));
+ current.x = As<Short4>(PackUnsigned(current.x, current.x));
current.y = As<Short4>(PackUnsigned(current.y, current.y));
-
- current.x = current.z;
- current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
- current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
- current.y = current.z;
- current.z = As<Short4>(UnpackLow(current.z, current.x));
- current.y = As<Short4>(UnpackHigh(current.y, current.x));
- }
- else
- {
+ current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
+ break;
+ case VK_FORMAT_R8_UNORM:
current.x = As<Short4>(As<UShort4>(current.x) >> 8);
- current.y = As<Short4>(As<UShort4>(current.y) >> 8);
- current.z = As<Short4>(As<UShort4>(current.z) >> 8);
- current.w = As<Short4>(As<UShort4>(current.w) >> 8);
-
- current.z = As<Short4>(PackUnsigned(current.z, current.x));
- current.y = As<Short4>(PackUnsigned(current.y, current.w));
-
- current.x = current.z;
- current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
- current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.x = As<Short4>(PackUnsigned(current.x, current.x));
+ break;
+ case VK_FORMAT_R16G16_UNORM:
+ current.z = current.x;
+ current.x = As<Short4>(UnpackLow(current.x, current.y));
+ current.z = As<Short4>(UnpackHigh(current.z, current.y));
current.y = current.z;
- current.z = As<Short4>(UnpackLow(current.z, current.x));
- current.y = As<Short4>(UnpackHigh(current.y, current.x));
- }
- break;
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
- if(rgbaWriteMask == 0x7)
+ break;
+ case VK_FORMAT_R16G16B16A16_UNORM:
+ transpose4x4(current.x, current.y, current.z, current.w);
+ break;
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
{
- current.x = As<Short4>(As<UShort4>(current.x) >> 8);
- current.y = As<Short4>(As<UShort4>(current.y) >> 8);
- current.z = As<Short4>(As<UShort4>(current.z) >> 8);
-
- current.z = As<Short4>(PackUnsigned(current.x, current.z));
- current.y = As<Short4>(PackUnsigned(current.y, current.y));
-
- current.x = current.z;
- current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
- current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
- current.y = current.z;
- current.z = As<Short4>(UnpackLow(current.z, current.x));
- current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
+ auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
+ auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
+ auto a = (Int4(current.w) >> 14) & Int4(0x3);
+ Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
+ auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
+ auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
+ current.x = UnpackLow(c02, c13);
+ current.y = UnpackHigh(c02, c13);
+ break;
}
- else
- {
- current.x = As<Short4>(As<UShort4>(current.x) >> 8);
- current.y = As<Short4>(As<UShort4>(current.y) >> 8);
- current.z = As<Short4>(As<UShort4>(current.z) >> 8);
- current.w = As<Short4>(As<UShort4>(current.w) >> 8);
-
- current.z = As<Short4>(PackUnsigned(current.x, current.z));
- current.y = As<Short4>(PackUnsigned(current.y, current.w));
-
- current.x = current.z;
- current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
- current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
- current.y = current.z;
- current.z = As<Short4>(UnpackLow(current.z, current.x));
- current.y = As<Short4>(UnpackHigh(current.y, current.x));
- }
- break;
- case VK_FORMAT_R8G8_UNORM:
- current.x = As<Short4>(As<UShort4>(current.x) >> 8);
- current.y = As<Short4>(As<UShort4>(current.y) >> 8);
- current.x = As<Short4>(PackUnsigned(current.x, current.x));
- current.y = As<Short4>(PackUnsigned(current.y, current.y));
- current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
- break;
- case VK_FORMAT_R8_UNORM:
- current.x = As<Short4>(As<UShort4>(current.x) >> 8);
- current.x = As<Short4>(PackUnsigned(current.x, current.x));
- break;
- case VK_FORMAT_R16G16_UNORM:
- current.z = current.x;
- current.x = As<Short4>(UnpackLow(current.x, current.y));
- current.z = As<Short4>(UnpackHigh(current.z, current.y));
- current.y = current.z;
- break;
- case VK_FORMAT_R16G16B16A16_UNORM:
- transpose4x4(current.x, current.y, current.z, current.w);
- break;
- case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
- {
- auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
- auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
- auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
- auto a = (Int4(current.w) >> 14) & Int4(0x3);
- Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
- auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
- auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
- current.x = UnpackLow(c02, c13);
- current.y = UnpackHigh(c02, c13);
- break;
- }
- default:
- UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+ default:
+ UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
}
Short4 c01 = current.z;
Short4 c23 = current.y;
- Int xMask; // Combination of all masks
+ Int xMask; // Combination of all masks
if(state.depthTestActive)
{
@@ -1373,26 +1374,26 @@
switch(state.targetFormat[index])
{
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
{
buffer += 2 * x;
Int value = *Pointer<Int>(buffer);
- Int channelMask = *Pointer<Int>(constants + OFFSET(Constants,mask5551Q[bgraWriteMask & 0xF][0]));
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask5551Q[bgraWriteMask & 0xF][0]));
Int c01 = Extract(As<Int2>(current.x), 0);
- Int mask01 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
if(bgraWriteMask != 0x0000000F)
{
mask01 &= channelMask;
}
*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
value = *Pointer<Int>(buffer);
Int c23 = Extract(As<Int2>(current.x), 1);
- Int mask23 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
if(bgraWriteMask != 0x0000000F)
{
mask23 &= channelMask;
@@ -1400,26 +1401,26 @@
*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
}
break;
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
{
buffer += 2 * x;
Int value = *Pointer<Int>(buffer);
- Int channelMask = *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask565Q[bgraWriteMask & 0x7][0]));
Int c01 = Extract(As<Int2>(current.x), 0);
- Int mask01 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
if((bgraWriteMask & 0x00000007) != 0x00000007)
{
mask01 &= channelMask;
}
*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
value = *Pointer<Int>(buffer);
Int c23 = Extract(As<Int2>(current.x), 1);
- Int mask23 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
if((bgraWriteMask & 0x00000007) != 0x00000007)
{
mask23 &= channelMask;
@@ -1427,24 +1428,24 @@
*Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
}
break;
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
{
buffer += x * 4;
Short4 value = *Pointer<Short4>(buffer);
- Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
+ Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[bgraWriteMask][0]));
- Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
+ Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
if(bgraWriteMask != 0x0000000F)
{
mask01 &= channelMask;
}
*Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
value = *Pointer<Short4>(buffer);
- Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
+ Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
if(bgraWriteMask != 0x0000000F)
{
mask23 &= channelMask;
@@ -1452,26 +1453,26 @@
*Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
}
break;
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
{
buffer += x * 4;
Short4 value = *Pointer<Short4>(buffer);
- Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
+ Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
- Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
+ Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
if(rgbaWriteMask != 0x0000000F)
{
mask01 &= channelMask;
}
*Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
value = *Pointer<Short4>(buffer);
- Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
+ Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
if(rgbaWriteMask != 0x0000000F)
{
mask23 &= channelMask;
@@ -1479,49 +1480,49 @@
*Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
}
break;
- case VK_FORMAT_R8G8_UNORM:
- if((rgbaWriteMask & 0x00000003) != 0x0)
- {
- buffer += 2 * x;
- Int2 value;
- value = Insert(value, *Pointer<Int>(buffer), 0);
- Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
-
- Int2 packedCol = As<Int2>(current.x);
-
- UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
- if((rgbaWriteMask & 0x3) != 0x3)
+ case VK_FORMAT_R8G8_UNORM:
+ if((rgbaWriteMask & 0x00000003) != 0x0)
{
- Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
- UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
- mergedMask &= rgbaMask;
+ buffer += 2 * x;
+ Int2 value;
+ value = Insert(value, *Pointer<Int>(buffer), 0);
+ Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
+
+ Int2 packedCol = As<Int2>(current.x);
+
+ UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
+ if((rgbaWriteMask & 0x3) != 0x3)
+ {
+ Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
+ UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+ mergedMask &= rgbaMask;
+ }
+
+ packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
+
+ *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
+ *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
}
+ break;
+ case VK_FORMAT_R8_UNORM:
+ if(rgbaWriteMask & 0x00000001)
+ {
+ buffer += 1 * x;
+ Short4 value;
+ value = Insert(value, *Pointer<Short>(buffer), 0);
+ Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
- packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
+ current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
+ value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
+ current.x |= value;
- *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
- *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
- }
- break;
- case VK_FORMAT_R8_UNORM:
- if(rgbaWriteMask & 0x00000001)
- {
- buffer += 1 * x;
- Short4 value;
- value = Insert(value, *Pointer<Short>(buffer), 0);
- Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
-
- current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
- value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
- current.x |= value;
-
- *Pointer<Short>(buffer) = Extract(current.x, 0);
- *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
- }
- break;
- case VK_FORMAT_R16G16_UNORM:
+ *Pointer<Short>(buffer) = Extract(current.x, 0);
+ *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
+ }
+ break;
+ case VK_FORMAT_R16G16_UNORM:
{
buffer += 4 * x;
@@ -1530,35 +1531,35 @@
if((rgbaWriteMask & 0x00000003) != 0x00000003)
{
Short4 masked = value;
- current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
- masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
+ current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0]));
+ masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0]));
current.x |= masked;
}
- current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
- value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
+ current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+ value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD01Q) + xMask * 8);
current.x |= value;
*Pointer<Short4>(buffer) = current.x;
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
value = *Pointer<Short4>(buffer);
if((rgbaWriteMask & 0x00000003) != 0x00000003)
{
Short4 masked = value;
- current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
- masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
+ current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0]));
+ masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0]));
current.y |= masked;
}
- current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
- value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
+ current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+ value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD23Q) + xMask * 8);
current.y |= value;
*Pointer<Short4>(buffer) = current.y;
}
break;
- case VK_FORMAT_R16G16B16A16_UNORM:
+ case VK_FORMAT_R16G16B16A16_UNORM:
{
buffer += 8 * x;
@@ -1568,13 +1569,13 @@
if(rgbaWriteMask != 0x0000000F)
{
Short4 masked = value;
- current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
- masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
+ current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+ masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
current.x |= masked;
}
- current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
- value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
+ current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ0Q) + xMask * 8);
+ value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ0Q) + xMask * 8);
current.x |= value;
*Pointer<Short4>(buffer) = current.x;
}
@@ -1585,18 +1586,18 @@
if(rgbaWriteMask != 0x0000000F)
{
Short4 masked = value;
- current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
- masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
+ current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+ masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
current.y |= masked;
}
- current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
- value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
+ current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ1Q) + xMask * 8);
+ value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ1Q) + xMask * 8);
current.y |= value;
*Pointer<Short4>(buffer + 8) = current.y;
}
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
{
Short4 value = *Pointer<Short4>(buffer);
@@ -1604,13 +1605,13 @@
if(rgbaWriteMask != 0x0000000F)
{
Short4 masked = value;
- current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
- masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
+ current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+ masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
current.z |= masked;
}
- current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
- value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
+ current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ2Q) + xMask * 8);
+ value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ2Q) + xMask * 8);
current.z |= value;
*Pointer<Short4>(buffer) = current.z;
}
@@ -1621,13 +1622,13 @@
if(rgbaWriteMask != 0x0000000F)
{
Short4 masked = value;
- current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
- masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
+ current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+ masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
current.w |= masked;
}
- current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
- value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
+ current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ3Q) + xMask * 8);
+ value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ3Q) + xMask * 8);
current.w |= value;
*Pointer<Short4>(buffer + 8) = current.w;
}
@@ -1656,8 +1657,8 @@
*Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask);
}
break;
- default:
- UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+ default:
+ UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
}
}
@@ -1665,85 +1666,85 @@
{
switch(blendFactorActive)
{
- case VK_BLEND_FACTOR_ZERO:
- blendFactor.x = Float4(0);
- blendFactor.y = Float4(0);
- blendFactor.z = Float4(0);
- break;
- case VK_BLEND_FACTOR_ONE:
- blendFactor.x = Float4(1);
- blendFactor.y = Float4(1);
- blendFactor.z = Float4(1);
- break;
- case VK_BLEND_FACTOR_SRC_COLOR:
- blendFactor.x = oC.x;
- blendFactor.y = oC.y;
- blendFactor.z = oC.z;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
- blendFactor.x = Float4(1.0f) - oC.x;
- blendFactor.y = Float4(1.0f) - oC.y;
- blendFactor.z = Float4(1.0f) - oC.z;
- break;
- case VK_BLEND_FACTOR_DST_COLOR:
- blendFactor.x = pixel.x;
- blendFactor.y = pixel.y;
- blendFactor.z = pixel.z;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
- blendFactor.x = Float4(1.0f) - pixel.x;
- blendFactor.y = Float4(1.0f) - pixel.y;
- blendFactor.z = Float4(1.0f) - pixel.z;
- break;
- case VK_BLEND_FACTOR_SRC_ALPHA:
- blendFactor.x = oC.w;
- blendFactor.y = oC.w;
- blendFactor.z = oC.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
- blendFactor.x = Float4(1.0f) - oC.w;
- blendFactor.y = Float4(1.0f) - oC.w;
- blendFactor.z = Float4(1.0f) - oC.w;
- break;
- case VK_BLEND_FACTOR_DST_ALPHA:
- blendFactor.x = pixel.w;
- blendFactor.y = pixel.w;
- blendFactor.z = pixel.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
- blendFactor.x = Float4(1.0f) - pixel.w;
- blendFactor.y = Float4(1.0f) - pixel.w;
- blendFactor.z = Float4(1.0f) - pixel.w;
- break;
- case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
- blendFactor.x = Float4(1.0f) - pixel.w;
- blendFactor.x = Min(blendFactor.x, oC.w);
- blendFactor.y = blendFactor.x;
- blendFactor.z = blendFactor.x;
- break;
- case VK_BLEND_FACTOR_CONSTANT_COLOR:
- blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
- blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
- blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
- break;
- case VK_BLEND_FACTOR_CONSTANT_ALPHA:
- blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
- blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
- blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
- blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
- blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
- blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
- blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
- blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
- blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
- break;
+ case VK_BLEND_FACTOR_ZERO:
+ blendFactor.x = Float4(0);
+ blendFactor.y = Float4(0);
+ blendFactor.z = Float4(0);
+ break;
+ case VK_BLEND_FACTOR_ONE:
+ blendFactor.x = Float4(1);
+ blendFactor.y = Float4(1);
+ blendFactor.z = Float4(1);
+ break;
+ case VK_BLEND_FACTOR_SRC_COLOR:
+ blendFactor.x = oC.x;
+ blendFactor.y = oC.y;
+ blendFactor.z = oC.z;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+ blendFactor.x = Float4(1.0f) - oC.x;
+ blendFactor.y = Float4(1.0f) - oC.y;
+ blendFactor.z = Float4(1.0f) - oC.z;
+ break;
+ case VK_BLEND_FACTOR_DST_COLOR:
+ blendFactor.x = pixel.x;
+ blendFactor.y = pixel.y;
+ blendFactor.z = pixel.z;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+ blendFactor.x = Float4(1.0f) - pixel.x;
+ blendFactor.y = Float4(1.0f) - pixel.y;
+ blendFactor.z = Float4(1.0f) - pixel.z;
+ break;
+ case VK_BLEND_FACTOR_SRC_ALPHA:
+ blendFactor.x = oC.w;
+ blendFactor.y = oC.w;
+ blendFactor.z = oC.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+ blendFactor.x = Float4(1.0f) - oC.w;
+ blendFactor.y = Float4(1.0f) - oC.w;
+ blendFactor.z = Float4(1.0f) - oC.w;
+ break;
+ case VK_BLEND_FACTOR_DST_ALPHA:
+ blendFactor.x = pixel.w;
+ blendFactor.y = pixel.w;
+ blendFactor.z = pixel.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+ blendFactor.x = Float4(1.0f) - pixel.w;
+ blendFactor.y = Float4(1.0f) - pixel.w;
+ blendFactor.z = Float4(1.0f) - pixel.w;
+ break;
+ case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+ blendFactor.x = Float4(1.0f) - pixel.w;
+ blendFactor.x = Min(blendFactor.x, oC.w);
+ blendFactor.y = blendFactor.x;
+ blendFactor.z = blendFactor.x;
+ break;
+ case VK_BLEND_FACTOR_CONSTANT_COLOR:
+ blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[0]));
+ blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[1]));
+ blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[2]));
+ break;
+ case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+ blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
+ blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
+ blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+ blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[0]));
+ blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[1]));
+ blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[2]));
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+ blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
+ blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
+ blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
+ break;
- default:
- UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
+ default:
+ UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
}
}
@@ -1751,49 +1752,49 @@
{
switch(blendFactorAlphaActive)
{
- case VK_BLEND_FACTOR_ZERO:
- blendFactor.w = Float4(0);
- break;
- case VK_BLEND_FACTOR_ONE:
- blendFactor.w = Float4(1);
- break;
- case VK_BLEND_FACTOR_SRC_COLOR:
- blendFactor.w = oC.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
- blendFactor.w = Float4(1.0f) - oC.w;
- break;
- case VK_BLEND_FACTOR_DST_COLOR:
- blendFactor.w = pixel.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
- blendFactor.w = Float4(1.0f) - pixel.w;
- break;
- case VK_BLEND_FACTOR_SRC_ALPHA:
- blendFactor.w = oC.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
- blendFactor.w = Float4(1.0f) - oC.w;
- break;
- case VK_BLEND_FACTOR_DST_ALPHA:
- blendFactor.w = pixel.w;
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
- blendFactor.w = Float4(1.0f) - pixel.w;
- break;
- case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
- blendFactor.w = Float4(1.0f);
- break;
- case VK_BLEND_FACTOR_CONSTANT_COLOR:
- case VK_BLEND_FACTOR_CONSTANT_ALPHA:
- blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
- break;
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
- blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
- break;
- default:
- UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
+ case VK_BLEND_FACTOR_ZERO:
+ blendFactor.w = Float4(0);
+ break;
+ case VK_BLEND_FACTOR_ONE:
+ blendFactor.w = Float4(1);
+ break;
+ case VK_BLEND_FACTOR_SRC_COLOR:
+ blendFactor.w = oC.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+ blendFactor.w = Float4(1.0f) - oC.w;
+ break;
+ case VK_BLEND_FACTOR_DST_COLOR:
+ blendFactor.w = pixel.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+ blendFactor.w = Float4(1.0f) - pixel.w;
+ break;
+ case VK_BLEND_FACTOR_SRC_ALPHA:
+ blendFactor.w = oC.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+ blendFactor.w = Float4(1.0f) - oC.w;
+ break;
+ case VK_BLEND_FACTOR_DST_ALPHA:
+ blendFactor.w = pixel.w;
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+ blendFactor.w = Float4(1.0f) - pixel.w;
+ break;
+ case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+ blendFactor.w = Float4(1.0f);
+ break;
+ case VK_BLEND_FACTOR_CONSTANT_COLOR:
+ case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+ blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
+ break;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+ blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
+ break;
+ default:
+ UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
}
}
@@ -1829,89 +1830,89 @@
switch(state.targetFormat[index])
{
- case VK_FORMAT_R32_SINT:
- case VK_FORMAT_R32_UINT:
- case VK_FORMAT_R32_SFLOAT:
- // FIXME: movlps
- pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
- pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
- // FIXME: movhps
- pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
- pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
- pixel.y = pixel.z = pixel.w = one;
- break;
- case VK_FORMAT_R32G32_SINT:
- case VK_FORMAT_R32G32_UINT:
- case VK_FORMAT_R32G32_SFLOAT:
- pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
- pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
- pixel.z = pixel.x;
- pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202);
- pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313);
- pixel.y = pixel.z;
- pixel.z = pixel.w = one;
- break;
- case VK_FORMAT_R32G32B32A32_SFLOAT:
- case VK_FORMAT_R32G32B32A32_SINT:
- case VK_FORMAT_R32G32B32A32_UINT:
- pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
- pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
- pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
- pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
- transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
- break;
- case VK_FORMAT_R16_SFLOAT:
- pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0));
- pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2));
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
- pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0));
- pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2));
- pixel.y = pixel.z = pixel.w = one;
- break;
- case VK_FORMAT_R16G16_SFLOAT:
- pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0));
- pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2));
- pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4));
- pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6));
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
- pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0));
- pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2));
- pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4));
- pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6));
- pixel.z = pixel.w = one;
- break;
- case VK_FORMAT_R16G16B16A16_SFLOAT:
- pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
- pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
- pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
- pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
- pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
- pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
- pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
- pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
- pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
- pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
- pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
- pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
- pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
- pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
- pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
- pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
- break;
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
- pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
- pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
- pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
- transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
- break;
- default:
- UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32_SFLOAT:
+ // FIXME: movlps
+ pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
+ pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ // FIXME: movhps
+ pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
+ pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
+ pixel.y = pixel.z = pixel.w = one;
+ break;
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
+ pixel.z = pixel.x;
+ pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202);
+ pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313);
+ pixel.y = pixel.z;
+ pixel.z = pixel.w = one;
+ break;
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
+ pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
+ pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
+ transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
+ break;
+ case VK_FORMAT_R16_SFLOAT:
+ pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0));
+ pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0));
+ pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2));
+ pixel.y = pixel.z = pixel.w = one;
+ break;
+ case VK_FORMAT_R16G16_SFLOAT:
+ pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0));
+ pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2));
+ pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4));
+ pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0));
+ pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2));
+ pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4));
+ pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6));
+ pixel.z = pixel.w = one;
+ break;
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
+ pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
+ pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
+ pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
+ pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
+ pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
+ pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
+ pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
+ pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
+ pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
+ pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
+ pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
+ pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
+ pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
+ pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
+ break;
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
+ pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
+ pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
+ transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
+ break;
+ default:
+ UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
}
// Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
@@ -1931,46 +1932,46 @@
switch(state.blendState[index].blendOperation)
{
- case VK_BLEND_OP_ADD:
- oC.x += pixel.x;
- oC.y += pixel.y;
- oC.z += pixel.z;
- break;
- case VK_BLEND_OP_SUBTRACT:
- oC.x -= pixel.x;
- oC.y -= pixel.y;
- oC.z -= pixel.z;
- break;
- case VK_BLEND_OP_REVERSE_SUBTRACT:
- oC.x = pixel.x - oC.x;
- oC.y = pixel.y - oC.y;
- oC.z = pixel.z - oC.z;
- break;
- case VK_BLEND_OP_MIN:
- oC.x = Min(oC.x, pixel.x);
- oC.y = Min(oC.y, pixel.y);
- oC.z = Min(oC.z, pixel.z);
- break;
- case VK_BLEND_OP_MAX:
- oC.x = Max(oC.x, pixel.x);
- oC.y = Max(oC.y, pixel.y);
- oC.z = Max(oC.z, pixel.z);
- break;
- case VK_BLEND_OP_SRC_EXT:
- // No operation
- break;
- case VK_BLEND_OP_DST_EXT:
- oC.x = pixel.x;
- oC.y = pixel.y;
- oC.z = pixel.z;
- break;
- case VK_BLEND_OP_ZERO_EXT:
- oC.x = Float4(0.0f);
- oC.y = Float4(0.0f);
- oC.z = Float4(0.0f);
- break;
- default:
- UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
+ case VK_BLEND_OP_ADD:
+ oC.x += pixel.x;
+ oC.y += pixel.y;
+ oC.z += pixel.z;
+ break;
+ case VK_BLEND_OP_SUBTRACT:
+ oC.x -= pixel.x;
+ oC.y -= pixel.y;
+ oC.z -= pixel.z;
+ break;
+ case VK_BLEND_OP_REVERSE_SUBTRACT:
+ oC.x = pixel.x - oC.x;
+ oC.y = pixel.y - oC.y;
+ oC.z = pixel.z - oC.z;
+ break;
+ case VK_BLEND_OP_MIN:
+ oC.x = Min(oC.x, pixel.x);
+ oC.y = Min(oC.y, pixel.y);
+ oC.z = Min(oC.z, pixel.z);
+ break;
+ case VK_BLEND_OP_MAX:
+ oC.x = Max(oC.x, pixel.x);
+ oC.y = Max(oC.y, pixel.y);
+ oC.z = Max(oC.z, pixel.z);
+ break;
+ case VK_BLEND_OP_SRC_EXT:
+ // No operation
+ break;
+ case VK_BLEND_OP_DST_EXT:
+ oC.x = pixel.x;
+ oC.y = pixel.y;
+ oC.z = pixel.z;
+ break;
+ case VK_BLEND_OP_ZERO_EXT:
+ oC.x = Float4(0.0f);
+ oC.y = Float4(0.0f);
+ oC.z = Float4(0.0f);
+ break;
+ default:
+ UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
}
blendFactorAlpha(sourceFactor, oC, pixel, state.blendState[index].sourceBlendFactorAlpha);
@@ -1981,33 +1982,33 @@
switch(state.blendState[index].blendOperationAlpha)
{
- case VK_BLEND_OP_ADD:
- oC.w += pixel.w;
- break;
- case VK_BLEND_OP_SUBTRACT:
- oC.w -= pixel.w;
- break;
- case VK_BLEND_OP_REVERSE_SUBTRACT:
- pixel.w -= oC.w;
- oC.w = pixel.w;
- break;
- case VK_BLEND_OP_MIN:
- oC.w = Min(oC.w, pixel.w);
- break;
- case VK_BLEND_OP_MAX:
- oC.w = Max(oC.w, pixel.w);
- break;
- case VK_BLEND_OP_SRC_EXT:
- // No operation
- break;
- case VK_BLEND_OP_DST_EXT:
- oC.w = pixel.w;
- break;
- case VK_BLEND_OP_ZERO_EXT:
- oC.w = Float4(0.0f);
- break;
- default:
- UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
+ case VK_BLEND_OP_ADD:
+ oC.w += pixel.w;
+ break;
+ case VK_BLEND_OP_SUBTRACT:
+ oC.w -= pixel.w;
+ break;
+ case VK_BLEND_OP_REVERSE_SUBTRACT:
+ pixel.w -= oC.w;
+ oC.w = pixel.w;
+ break;
+ case VK_BLEND_OP_MIN:
+ oC.w = Min(oC.w, pixel.w);
+ break;
+ case VK_BLEND_OP_MAX:
+ oC.w = Max(oC.w, pixel.w);
+ break;
+ case VK_BLEND_OP_SRC_EXT:
+ // No operation
+ break;
+ case VK_BLEND_OP_DST_EXT:
+ oC.w = pixel.w;
+ break;
+ case VK_BLEND_OP_ZERO_EXT:
+ oC.w = Float4(0.0f);
+ break;
+ default:
+ UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
}
}
@@ -2015,49 +2016,49 @@
{
switch(state.targetFormat[index])
{
- case VK_FORMAT_R16_SFLOAT:
- case VK_FORMAT_R32_SFLOAT:
- case VK_FORMAT_R32_SINT:
- case VK_FORMAT_R32_UINT:
- case VK_FORMAT_R16_SINT:
- case VK_FORMAT_R16_UINT:
- case VK_FORMAT_R8_SINT:
- case VK_FORMAT_R8_UINT:
- case VK_FORMAT_A2B10G10R10_UINT_PACK32:
- break;
- case VK_FORMAT_R16G16_SFLOAT:
- case VK_FORMAT_R32G32_SFLOAT:
- case VK_FORMAT_R32G32_SINT:
- case VK_FORMAT_R32G32_UINT:
- case VK_FORMAT_R16G16_SINT:
- case VK_FORMAT_R16G16_UINT:
- case VK_FORMAT_R8G8_SINT:
- case VK_FORMAT_R8G8_UINT:
- oC.z = oC.x;
- oC.x = UnpackLow(oC.x, oC.y);
- oC.z = UnpackHigh(oC.z, oC.y);
- oC.y = oC.z;
- break;
- case VK_FORMAT_R16G16B16A16_SFLOAT:
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- case VK_FORMAT_R32G32B32A32_SFLOAT:
- case VK_FORMAT_R32G32B32A32_SINT:
- case VK_FORMAT_R32G32B32A32_UINT:
- case VK_FORMAT_R16G16B16A16_SINT:
- case VK_FORMAT_R16G16B16A16_UINT:
- case VK_FORMAT_R8G8B8A8_SINT:
- case VK_FORMAT_R8G8B8A8_UINT:
- case VK_FORMAT_A8B8G8R8_UINT_PACK32:
- case VK_FORMAT_A8B8G8R8_SINT_PACK32:
- transpose4x4(oC.x, oC.y, oC.z, oC.w);
- break;
- default:
- UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+ case VK_FORMAT_R16_SFLOAT:
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R16_SINT:
+ case VK_FORMAT_R16_UINT:
+ case VK_FORMAT_R8_SINT:
+ case VK_FORMAT_R8_UINT:
+ case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+ break;
+ case VK_FORMAT_R16G16_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R16G16_SINT:
+ case VK_FORMAT_R16G16_UINT:
+ case VK_FORMAT_R8G8_SINT:
+ case VK_FORMAT_R8G8_UINT:
+ oC.z = oC.x;
+ oC.x = UnpackLow(oC.x, oC.y);
+ oC.z = UnpackHigh(oC.z, oC.y);
+ oC.y = oC.z;
+ break;
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ case VK_FORMAT_R16G16B16A16_SINT:
+ case VK_FORMAT_R16G16B16A16_UINT:
+ case VK_FORMAT_R8G8B8A8_SINT:
+ case VK_FORMAT_R8G8B8A8_UINT:
+ case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+ case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+ transpose4x4(oC.x, oC.y, oC.z, oC.w);
+ break;
+ default:
+ UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
}
int rgbaWriteMask = state.colorWriteActive(index);
- Int xMask; // Combination of all masks
+ Int xMask; // Combination of all masks
if(state.depthTestActive)
{
@@ -2080,520 +2081,520 @@
switch(targetFormat)
{
- case VK_FORMAT_R32_SFLOAT:
- case VK_FORMAT_R32_SINT:
- case VK_FORMAT_R32_UINT:
- if(rgbaWriteMask & 0x00000001)
- {
- buffer += 4 * x;
-
- // FIXME: movlps
- value.x = *Pointer<Float>(buffer + 0);
- value.y = *Pointer<Float>(buffer + 4);
-
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-
- // FIXME: movhps
- value.z = *Pointer<Float>(buffer + 0);
- value.w = *Pointer<Float>(buffer + 4);
-
- oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
- oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
-
- // FIXME: movhps
- *Pointer<Float>(buffer + 0) = oC.x.z;
- *Pointer<Float>(buffer + 4) = oC.x.w;
-
- buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-
- // FIXME: movlps
- *Pointer<Float>(buffer + 0) = oC.x.x;
- *Pointer<Float>(buffer + 4) = oC.x.y;
- }
- break;
- case VK_FORMAT_R16_SFLOAT:
- if(rgbaWriteMask & 0x00000001)
- {
- buffer += 2 * x;
-
- value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
- value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
-
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
- value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
- value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
-
- oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
- oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
-
- *Pointer<Half>(buffer + 0) = Half(oC.x.z);
- *Pointer<Half>(buffer + 2) = Half(oC.x.w);
-
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
- *Pointer<Half>(buffer + 0) = Half(oC.x.x);
- *Pointer<Half>(buffer + 2) = Half(oC.x.y);
- }
- break;
- case VK_FORMAT_R16_SINT:
- case VK_FORMAT_R16_UINT:
- if(rgbaWriteMask & 0x00000001)
- {
- buffer += 2 * x;
-
- UShort4 xyzw;
- xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
-
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
- xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
- value = As<Float4>(Int4(xyzw));
-
- oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
- oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
-
- if(targetFormat == VK_FORMAT_R16_SINT)
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32_UINT:
+ if(rgbaWriteMask & 0x00000001)
{
- Float component = oC.x.z;
- *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
- component = oC.x.w;
- *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
+ buffer += 4 * x;
+
+ // FIXME: movlps
+ value.x = *Pointer<Float>(buffer + 0);
+ value.y = *Pointer<Float>(buffer + 4);
+
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ // FIXME: movhps
+ value.z = *Pointer<Float>(buffer + 0);
+ value.w = *Pointer<Float>(buffer + 4);
+
+ oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
+ oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
+
+ // FIXME: movhps
+ *Pointer<Float>(buffer + 0) = oC.x.z;
+ *Pointer<Float>(buffer + 4) = oC.x.w;
buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- component = oC.x.x;
- *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
- component = oC.x.y;
- *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
+ // FIXME: movlps
+ *Pointer<Float>(buffer + 0) = oC.x.x;
+ *Pointer<Float>(buffer + 4) = oC.x.y;
}
- else // VK_FORMAT_R16_UINT
+ break;
+ case VK_FORMAT_R16_SFLOAT:
+ if(rgbaWriteMask & 0x00000001)
{
- Float component = oC.x.z;
- *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
- component = oC.x.w;
- *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
+ buffer += 2 * x;
+
+ value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
+ value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
+
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
+ value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
+
+ oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
+ oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
+
+ *Pointer<Half>(buffer + 0) = Half(oC.x.z);
+ *Pointer<Half>(buffer + 2) = Half(oC.x.w);
buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- component = oC.x.x;
- *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
- component = oC.x.y;
- *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
+ *Pointer<Half>(buffer + 0) = Half(oC.x.x);
+ *Pointer<Half>(buffer + 2) = Half(oC.x.y);
}
- }
- break;
- case VK_FORMAT_R8_SINT:
- case VK_FORMAT_R8_UINT:
- if(rgbaWriteMask & 0x00000001)
- {
- buffer += x;
-
- UInt xyzw, packedCol;
-
- xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
-
- Short4 tmpCol = Short4(As<Int4>(oC.x));
- if(targetFormat == VK_FORMAT_R8_SINT)
+ break;
+ case VK_FORMAT_R16_SINT:
+ case VK_FORMAT_R16_UINT:
+ if(rgbaWriteMask & 0x00000001)
{
- tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
+ buffer += 2 * x;
+
+ UShort4 xyzw;
+ xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
+
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
+ value = As<Float4>(Int4(xyzw));
+
+ oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
+ oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
+
+ if(targetFormat == VK_FORMAT_R16_SINT)
+ {
+ Float component = oC.x.z;
+ *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
+ component = oC.x.w;
+ *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
+
+ buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ component = oC.x.x;
+ *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
+ component = oC.x.y;
+ *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
+ }
+ else // VK_FORMAT_R16_UINT
+ {
+ Float component = oC.x.z;
+ *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
+ component = oC.x.w;
+ *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
+
+ buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ component = oC.x.x;
+ *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
+ component = oC.x.y;
+ *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
+ }
}
- else
+ break;
+ case VK_FORMAT_R8_SINT:
+ case VK_FORMAT_R8_UINT:
+ if(rgbaWriteMask & 0x00000001)
{
- tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
+ buffer += x;
+
+ UInt xyzw, packedCol;
+
+ xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
+
+ Short4 tmpCol = Short4(As<Int4>(oC.x));
+ if(targetFormat == VK_FORMAT_R8_SINT)
+ {
+ tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
+ }
+ else
+ {
+ tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
+ }
+ packedCol = Extract(As<Int2>(tmpCol), 0);
+
+ packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
+ (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
+
+ *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
+ buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ *Pointer<UShort>(buffer) = UShort(packedCol);
}
- packedCol = Extract(As<Int2>(tmpCol), 0);
+ break;
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32_UINT:
+ buffer += 8 * x;
- packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
- (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
+ value = *Pointer<Float4>(buffer);
- *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- *Pointer<UShort>(buffer) = UShort(packedCol);
- }
- break;
- case VK_FORMAT_R32G32_SFLOAT:
- case VK_FORMAT_R32G32_SINT:
- case VK_FORMAT_R32G32_UINT:
- buffer += 8 * x;
-
- value = *Pointer<Float4>(buffer);
-
- if((rgbaWriteMask & 0x00000003) != 0x00000003)
- {
- Float4 masked = value;
- oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
- masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
- oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
- }
-
- oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
- oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
- *Pointer<Float4>(buffer) = oC.x;
-
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-
- value = *Pointer<Float4>(buffer);
-
- if((rgbaWriteMask & 0x00000003) != 0x00000003)
- {
- Float4 masked;
-
- masked = value;
- oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
- masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
- oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
- }
-
- oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
- oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
- *Pointer<Float4>(buffer) = oC.y;
- break;
- case VK_FORMAT_R16G16_SFLOAT:
- if((rgbaWriteMask & 0x00000003) != 0x0)
- {
- buffer += 4 * x;
-
- UInt2 rgbaMask;
- UInt2 packedCol;
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
-
- UShort4 value = *Pointer<UShort4>(buffer);
- UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
- if((rgbaWriteMask & 0x3) != 0x3)
- {
- Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
- rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
- mergedMask &= rgbaMask;
- }
- *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
-
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1);
- value = *Pointer<UShort4>(buffer);
- mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
- if((rgbaWriteMask & 0x3) != 0x3)
- {
- mergedMask &= rgbaMask;
- }
- *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
- }
- break;
- case VK_FORMAT_R16G16_SINT:
- case VK_FORMAT_R16G16_UINT:
- if((rgbaWriteMask & 0x00000003) != 0x0)
- {
- buffer += 4 * x;
-
- UInt2 rgbaMask;
- UShort4 packedCol = UShort4(As<Int4>(oC.x));
- UShort4 value = *Pointer<UShort4>(buffer);
- UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
- if((rgbaWriteMask & 0x3) != 0x3)
- {
- Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
- rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
- mergedMask &= rgbaMask;
- }
- *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
-
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
- packedCol = UShort4(As<Int4>(oC.y));
- value = *Pointer<UShort4>(buffer);
- mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
- if((rgbaWriteMask & 0x3) != 0x3)
- {
- mergedMask &= rgbaMask;
- }
- *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
- }
- break;
- case VK_FORMAT_R8G8_SINT:
- case VK_FORMAT_R8G8_UINT:
- if((rgbaWriteMask & 0x00000003) != 0x0)
- {
- buffer += 2 * x;
-
- Int2 xyzw, packedCol;
-
- xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
-
- if(targetFormat == VK_FORMAT_R8G8_SINT)
- {
- packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
- }
- else
- {
- packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
- }
-
- UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
- if((rgbaWriteMask & 0x3) != 0x3)
- {
- Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
- UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
- mergedMask &= rgbaMask;
- }
-
- packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
-
- *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
- }
- break;
- case VK_FORMAT_R32G32B32A32_SFLOAT:
- case VK_FORMAT_R32G32B32A32_SINT:
- case VK_FORMAT_R32G32B32A32_UINT:
- buffer += 16 * x;
-
- {
- value = *Pointer<Float4>(buffer, 16);
-
- if(rgbaWriteMask != 0x0000000F)
+ if((rgbaWriteMask & 0x00000003) != 0x00000003)
{
Float4 masked = value;
- oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
- masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
+ oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0])));
+ masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0])));
oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
}
- oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
+ oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ01X) + xMask * 16, 16));
oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
- *Pointer<Float4>(buffer, 16) = oC.x;
- }
+ *Pointer<Float4>(buffer) = oC.x;
- {
- value = *Pointer<Float4>(buffer + 16, 16);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- if(rgbaWriteMask != 0x0000000F)
+ value = *Pointer<Float4>(buffer);
+
+ if((rgbaWriteMask & 0x00000003) != 0x00000003)
{
- Float4 masked = value;
- oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
- masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
+ Float4 masked;
+
+ masked = value;
+ oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0])));
+ masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0])));
oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
}
- oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
+ oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ23X) + xMask * 16, 16));
oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
- *Pointer<Float4>(buffer + 16, 16) = oC.y;
- }
-
- buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
-
- {
- value = *Pointer<Float4>(buffer, 16);
-
- if(rgbaWriteMask != 0x0000000F)
+ *Pointer<Float4>(buffer) = oC.y;
+ break;
+ case VK_FORMAT_R16G16_SFLOAT:
+ if((rgbaWriteMask & 0x00000003) != 0x0)
{
- Float4 masked = value;
- oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
- masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
- oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
+ buffer += 4 * x;
+
+ UInt2 rgbaMask;
+ UInt2 packedCol;
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
+
+ UShort4 value = *Pointer<UShort4>(buffer);
+ UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+ if((rgbaWriteMask & 0x3) != 0x3)
+ {
+ Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
+ rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+ mergedMask &= rgbaMask;
+ }
+ *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
+
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1);
+ value = *Pointer<UShort4>(buffer);
+ mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+ if((rgbaWriteMask & 0x3) != 0x3)
+ {
+ mergedMask &= rgbaMask;
+ }
+ *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
+ }
+ break;
+ case VK_FORMAT_R16G16_SINT:
+ case VK_FORMAT_R16G16_UINT:
+ if((rgbaWriteMask & 0x00000003) != 0x0)
+ {
+ buffer += 4 * x;
+
+ UInt2 rgbaMask;
+ UShort4 packedCol = UShort4(As<Int4>(oC.x));
+ UShort4 value = *Pointer<UShort4>(buffer);
+ UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+ if((rgbaWriteMask & 0x3) != 0x3)
+ {
+ Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
+ rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+ mergedMask &= rgbaMask;
+ }
+ *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
+
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ packedCol = UShort4(As<Int4>(oC.y));
+ value = *Pointer<UShort4>(buffer);
+ mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+ if((rgbaWriteMask & 0x3) != 0x3)
+ {
+ mergedMask &= rgbaMask;
+ }
+ *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
+ }
+ break;
+ case VK_FORMAT_R8G8_SINT:
+ case VK_FORMAT_R8G8_UINT:
+ if((rgbaWriteMask & 0x00000003) != 0x0)
+ {
+ buffer += 2 * x;
+
+ Int2 xyzw, packedCol;
+
+ xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
+
+ if(targetFormat == VK_FORMAT_R8G8_SINT)
+ {
+ packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+ }
+ else
+ {
+ packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+ }
+
+ UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
+ if((rgbaWriteMask & 0x3) != 0x3)
+ {
+ Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
+ UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+ mergedMask &= rgbaMask;
+ }
+
+ packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
+
+ *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
+ buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
+ }
+ break;
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ buffer += 16 * x;
+
+ {
+ value = *Pointer<Float4>(buffer, 16);
+
+ if(rgbaWriteMask != 0x0000000F)
+ {
+ Float4 masked = value;
+ oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
+ masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
+ oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
+ }
+
+ oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskX0X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX0X) + xMask * 16, 16));
+ oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
+ *Pointer<Float4>(buffer, 16) = oC.x;
}
- oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
- oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
- *Pointer<Float4>(buffer, 16) = oC.z;
- }
-
- {
- value = *Pointer<Float4>(buffer + 16, 16);
-
- if(rgbaWriteMask != 0x0000000F)
{
- Float4 masked = value;
- oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
- masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
- oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
- }
+ value = *Pointer<Float4>(buffer + 16, 16);
- oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
- value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
- oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
- *Pointer<Float4>(buffer + 16, 16) = oC.w;
- }
- break;
- case VK_FORMAT_R16G16B16A16_SFLOAT:
- if((rgbaWriteMask & 0x0000000F) != 0x0)
- {
- buffer += 8 * x;
+ if(rgbaWriteMask != 0x0000000F)
+ {
+ Float4 masked = value;
+ oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
+ masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
+ oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
+ }
- UInt4 rgbaMask;
- UInt4 value = *Pointer<UInt4>(buffer);
- UInt4 packedCol;
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3);
- UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
- if((rgbaWriteMask & 0xF) != 0xF)
- {
- UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
- rgbaMask = UInt4(tmpMask, tmpMask);
- mergedMask &= rgbaMask;
+ oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskX1X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX1X) + xMask * 16, 16));
+ oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
+ *Pointer<Float4>(buffer + 16, 16) = oC.y;
}
- *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- value = *Pointer<UInt4>(buffer);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2);
- packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3);
- mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
- if((rgbaWriteMask & 0xF) != 0xF)
{
- mergedMask &= rgbaMask;
+ value = *Pointer<Float4>(buffer, 16);
+
+ if(rgbaWriteMask != 0x0000000F)
+ {
+ Float4 masked = value;
+ oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
+ masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
+ oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
+ }
+
+ oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskX2X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX2X) + xMask * 16, 16));
+ oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
+ *Pointer<Float4>(buffer, 16) = oC.z;
}
- *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
- }
- break;
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- if((rgbaWriteMask & 0x7) != 0x0)
- {
- buffer += 4 * x;
- unsigned int mask = ((rgbaWriteMask & 0x1) ? 0x000007FF : 0) |
- ((rgbaWriteMask & 0x2) ? 0x003FF800 : 0) |
- ((rgbaWriteMask & 0x4) ? 0xFFC00000 : 0);
- UInt2 mergedMask(mask, mask);
-
- UInt2 value;
- value = Insert(value, r11g11b10Pack(oC.x), 0);
- value = Insert(value, r11g11b10Pack(oC.y), 1);
- *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- value = Insert(value, r11g11b10Pack(oC.z), 0);
- value = Insert(value, r11g11b10Pack(oC.w), 1);
- *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
- }
- break;
- case VK_FORMAT_R16G16B16A16_SINT:
- case VK_FORMAT_R16G16B16A16_UINT:
- if((rgbaWriteMask & 0x0000000F) != 0x0)
- {
- buffer += 8 * x;
-
- UInt4 rgbaMask;
- UShort8 value = *Pointer<UShort8>(buffer);
- UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
- UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
- if((rgbaWriteMask & 0xF) != 0xF)
{
- UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
- rgbaMask = UInt4(tmpMask, tmpMask);
- mergedMask &= rgbaMask;
+ value = *Pointer<Float4>(buffer + 16, 16);
+
+ if(rgbaWriteMask != 0x0000000F)
+ {
+ Float4 masked = value;
+ oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
+ masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
+ oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
+ }
+
+ oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskX3X) + xMask * 16, 16));
+ value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX3X) + xMask * 16, 16));
+ oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
+ *Pointer<Float4>(buffer + 16, 16) = oC.w;
}
- *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
-
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
-
- value = *Pointer<UShort8>(buffer);
- packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
- mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
- if((rgbaWriteMask & 0xF) != 0xF)
+ break;
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ if((rgbaWriteMask & 0x0000000F) != 0x0)
{
- mergedMask &= rgbaMask;
- }
- *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
- }
- break;
- case VK_FORMAT_R8G8B8A8_SINT:
- case VK_FORMAT_R8G8B8A8_UINT:
- case VK_FORMAT_A8B8G8R8_UINT_PACK32:
- case VK_FORMAT_A8B8G8R8_SINT_PACK32:
- if((rgbaWriteMask & 0x0000000F) != 0x0)
- {
- UInt2 value, packedCol, mergedMask;
+ buffer += 8 * x;
- buffer += 4 * x;
+ UInt4 rgbaMask;
+ UInt4 value = *Pointer<UInt4>(buffer);
+ UInt4 packedCol;
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3);
+ UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
+ if((rgbaWriteMask & 0xF) != 0xF)
+ {
+ UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+ rgbaMask = UInt4(tmpMask, tmpMask);
+ mergedMask &= rgbaMask;
+ }
+ *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
- bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32;
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- if(isSigned)
- {
- packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+ value = *Pointer<UInt4>(buffer);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2);
+ packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3);
+ mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
+ if((rgbaWriteMask & 0xF) != 0xF)
+ {
+ mergedMask &= rgbaMask;
+ }
+ *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
}
- else
+ break;
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ if((rgbaWriteMask & 0x7) != 0x0)
{
- packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
- }
- value = *Pointer<UInt2>(buffer, 16);
- mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
- if(rgbaWriteMask != 0xF)
- {
- mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
- }
- *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
+ buffer += 4 * x;
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ unsigned int mask = ((rgbaWriteMask & 0x1) ? 0x000007FF : 0) |
+ ((rgbaWriteMask & 0x2) ? 0x003FF800 : 0) |
+ ((rgbaWriteMask & 0x4) ? 0xFFC00000 : 0);
+ UInt2 mergedMask(mask, mask);
- if(isSigned)
- {
- packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
+ UInt2 value;
+ value = Insert(value, r11g11b10Pack(oC.x), 0);
+ value = Insert(value, r11g11b10Pack(oC.y), 1);
+ *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ value = Insert(value, r11g11b10Pack(oC.z), 0);
+ value = Insert(value, r11g11b10Pack(oC.w), 1);
+ *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
}
- else
+ break;
+ case VK_FORMAT_R16G16B16A16_SINT:
+ case VK_FORMAT_R16G16B16A16_UINT:
+ if((rgbaWriteMask & 0x0000000F) != 0x0)
{
- packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
- }
- value = *Pointer<UInt2>(buffer, 16);
- mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
- if(rgbaWriteMask != 0xF)
- {
- mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
- }
- *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
- }
- break;
- case VK_FORMAT_A2B10G10R10_UINT_PACK32:
- if((rgbaWriteMask & 0x0000000F) != 0x0)
- {
- Int2 mergedMask, packedCol, value;
- Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
- ((As<Int4>(oC.z) & Int4(0x3ff)) << 20) |
- ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
- ((As<Int4>(oC.x) & Int4(0x3ff)));
+ buffer += 8 * x;
- buffer += 4 * x;
- value = *Pointer<Int2>(buffer, 16);
- mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
- if(rgbaWriteMask != 0xF)
- {
- mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
- }
- *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
+ UInt4 rgbaMask;
+ UShort8 value = *Pointer<UShort8>(buffer);
+ UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
+ UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
+ if((rgbaWriteMask & 0xF) != 0xF)
+ {
+ UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
+ rgbaMask = UInt4(tmpMask, tmpMask);
+ mergedMask &= rgbaMask;
+ }
+ *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- value = *Pointer<Int2>(buffer, 16);
- mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
- if(rgbaWriteMask != 0xF)
- {
- mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+ value = *Pointer<UShort8>(buffer);
+ packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
+ mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
+ if((rgbaWriteMask & 0xF) != 0xF)
+ {
+ mergedMask &= rgbaMask;
+ }
+ *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
}
- *Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
- }
- break;
- default:
- UNIMPLEMENTED("VkFormat: %d", int(targetFormat));
+ break;
+ case VK_FORMAT_R8G8B8A8_SINT:
+ case VK_FORMAT_R8G8B8A8_UINT:
+ case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+ case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+ if((rgbaWriteMask & 0x0000000F) != 0x0)
+ {
+ UInt2 value, packedCol, mergedMask;
+
+ buffer += 4 * x;
+
+ bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32;
+
+ if(isSigned)
+ {
+ packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+ }
+ else
+ {
+ packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+ }
+ value = *Pointer<UInt2>(buffer, 16);
+ mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+ if(rgbaWriteMask != 0xF)
+ {
+ mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
+ }
+ *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
+
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ if(isSigned)
+ {
+ packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
+ }
+ else
+ {
+ packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
+ }
+ value = *Pointer<UInt2>(buffer, 16);
+ mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+ if(rgbaWriteMask != 0xF)
+ {
+ mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
+ }
+ *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
+ }
+ break;
+ case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+ if((rgbaWriteMask & 0x0000000F) != 0x0)
+ {
+ Int2 mergedMask, packedCol, value;
+ Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
+ ((As<Int4>(oC.z) & Int4(0x3ff)) << 20) |
+ ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
+ ((As<Int4>(oC.x) & Int4(0x3ff)));
+
+ buffer += 4 * x;
+ value = *Pointer<Int2>(buffer, 16);
+ mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+ if(rgbaWriteMask != 0xF)
+ {
+ mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+ }
+ *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
+
+ buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ value = *Pointer<Int2>(buffer, 16);
+ mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+ if(rgbaWriteMask != 0xF)
+ {
+ mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+ }
+ *Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
+ }
+ break;
+ default:
+ UNIMPLEMENTED("VkFormat: %d", int(targetFormat));
}
}
@@ -2604,7 +2605,7 @@
void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
{
- Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
+ Pointer<Byte> LUT = constants + OFFSET(Constants, sRGBtoLinear12_16);
c.x = As<UShort4>(c.x) >> 4;
c.y = As<UShort4>(c.y) >> 4;
@@ -2637,7 +2638,7 @@
void PixelRoutine::linearToSRGB12_16(Vector4s &c)
{
- Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
+ Pointer<Byte> LUT = constants + OFFSET(Constants, linearToSRGB12_16);
c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
@@ -2655,7 +2656,7 @@
c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
}
-Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
+Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
{
Float4 linear = x * x;
linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index 678d780..34722ad 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -26,16 +26,16 @@
{
public:
PixelRoutine(const PixelProcessor::State &state,
- vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *spirvShader,
- const vk::DescriptorSet::Bindings &descriptorSets);
+ vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *spirvShader,
+ const vk::DescriptorSet::Bindings &descriptorSets);
virtual ~PixelRoutine();
protected:
- Float4 z[4]; // Multisampled z
- Float4 w; // Used as is
- Float4 rhw; // Reciprocal w
+ Float4 z[4]; // Multisampled z
+ Float4 w; // Used as is
+ Float4 rhw; // Reciprocal w
SpirvRoutine routine;
const vk::DescriptorSet::Bindings &descriptorSets;
@@ -43,7 +43,7 @@
// Depth output
Float4 oDepth;
- virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]) = 0;
+ virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]) = 0;
virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) = 0;
virtual Bool alphaTest(Int cMask[4]) = 0;
virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0;
@@ -55,7 +55,7 @@
// Raster operations
void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s ¤t, const Int &x);
- void writeColor(int index, const Pointer<Byte> &cBuffer, const Int& x, Vector4f& oC, const Int& sMask, const Int& zMask, const Int& cMask);
+ void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &oC, const Int &sMask, const Int &zMask, const Int &cMask);
void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4f &oC, const Int &x);
void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s ¤t, const Int &sMask, const Int &zMask, const Int &cMask);
@@ -93,4 +93,4 @@
} // namespace sw
-#endif // sw_PixelRoutine_hpp
+#endif // sw_PixelRoutine_hpp
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index b2c2268..10d848e 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -14,35 +14,35 @@
#include "SamplerCore.hpp"
-#include "PixelRoutine.hpp"
#include "Constants.hpp"
-#include "Vulkan/VkSampler.hpp"
+#include "PixelRoutine.hpp"
#include "Vulkan/VkDebug.hpp"
+#include "Vulkan/VkSampler.hpp"
#include <limits>
namespace {
-void applySwizzle(VkComponentSwizzle swizzle, sw::Float4& f, const sw::Vector4f& c, bool integer)
+void applySwizzle(VkComponentSwizzle swizzle, sw::Float4 &f, const sw::Vector4f &c, bool integer)
{
switch(swizzle)
{
- case VK_COMPONENT_SWIZZLE_R: f = c.x; break;
- case VK_COMPONENT_SWIZZLE_G: f = c.y; break;
- case VK_COMPONENT_SWIZZLE_B: f = c.z; break;
- case VK_COMPONENT_SWIZZLE_A: f = c.w; break;
- case VK_COMPONENT_SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
- case VK_COMPONENT_SWIZZLE_ONE:
- if(integer)
- {
- f = rr::As<sw::Float4>(sw::Int4(1, 1, 1, 1));
- }
- else
- {
- f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f);
- }
- break;
- default: ASSERT(false);
+ case VK_COMPONENT_SWIZZLE_R: f = c.x; break;
+ case VK_COMPONENT_SWIZZLE_G: f = c.y; break;
+ case VK_COMPONENT_SWIZZLE_B: f = c.z; break;
+ case VK_COMPONENT_SWIZZLE_A: f = c.w; break;
+ case VK_COMPONENT_SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
+ case VK_COMPONENT_SWIZZLE_ONE:
+ if(integer)
+ {
+ f = rr::As<sw::Float4>(sw::Int4(1, 1, 1, 1));
+ }
+ else
+ {
+ f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f);
+ }
+ break;
+ default: ASSERT(false);
}
}
@@ -50,11 +50,13 @@
namespace sw {
-SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler &state) : constants(constants), state(state)
+SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler &state)
+ : constants(constants)
+ , state(state)
{
}
-Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4& sampleId, SamplerFunction function)
+Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4 &sampleId, SamplerFunction function)
{
Vector4f c;
@@ -119,7 +121,8 @@
{
lod = Float(0);
}
- else UNREACHABLE("Sampler function %d", int(function));
+ else
+ UNREACHABLE("Sampler function %d", int(function));
if(function != Base && function != Fetch && function != Gather)
{
@@ -139,7 +142,7 @@
}
c.x = lod;
- // c.y contains unclamped LOD.
+ // c.y contains unclamped LOD.
return c;
}
@@ -159,31 +162,85 @@
{
switch(state.textureFormat)
{
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ c.x *= Float4(1.0f / 0xF800);
+ c.y *= Float4(1.0f / 0xFC00);
+ c.z *= Float4(1.0f / 0xF800);
+ break;
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ c.x *= Float4(1.0f / 0xF000);
+ c.y *= Float4(1.0f / 0xF000);
+ c.z *= Float4(1.0f / 0xF000);
+ c.w *= Float4(1.0f / 0xF000);
+ break;
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ c.x *= Float4(1.0f / 0xF800);
+ c.y *= Float4(1.0f / 0xF800);
+ c.z *= Float4(1.0f / 0xF800);
+ c.w *= Float4(1.0f / 0x8000);
+ break;
+ case VK_FORMAT_R8_SNORM:
+ case VK_FORMAT_R8G8_SNORM:
+ case VK_FORMAT_R8G8B8A8_SNORM:
+ case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
+ c.x *= Float4(1.0f / 0x7F00);
+ c.y *= Float4(1.0f / 0x7F00);
+ c.z *= Float4(1.0f / 0x7F00);
+ c.w *= Float4(1.0f / 0x7F00);
+ break;
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_R8G8_UNORM:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_R8_SRGB:
+ case VK_FORMAT_R8G8_SRGB:
+ c.x *= Float4(1.0f / 0xFF00u);
+ c.y *= Float4(1.0f / 0xFF00u);
+ c.z *= Float4(1.0f / 0xFF00u);
+ c.w *= Float4(1.0f / 0xFF00u);
+ break;
+ default:
+ for(int component = 0; component < textureComponentCount(); component++)
+ {
+ c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
+ }
+ }
+ }
+ }
+ else // 16-bit filtering.
+ {
+ Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, function);
+
+ switch(state.textureFormat)
+ {
case VK_FORMAT_R5G6B5_UNORM_PACK16:
- c.x *= Float4(1.0f / 0xF800);
- c.y *= Float4(1.0f / 0xFC00);
- c.z *= Float4(1.0f / 0xF800);
+ c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
+ c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
+ c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
break;
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
- c.x *= Float4(1.0f / 0xF000);
- c.y *= Float4(1.0f / 0xF000);
- c.z *= Float4(1.0f / 0xF000);
- c.w *= Float4(1.0f / 0xF000);
+ c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000);
+ c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000);
+ c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000);
+ c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000);
break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- c.x *= Float4(1.0f / 0xF800);
- c.y *= Float4(1.0f / 0xF800);
- c.z *= Float4(1.0f / 0xF800);
- c.w *= Float4(1.0f / 0x8000);
+ c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
+ c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800);
+ c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
+ c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000);
break;
case VK_FORMAT_R8_SNORM:
case VK_FORMAT_R8G8_SNORM:
case VK_FORMAT_R8G8B8A8_SNORM:
case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
- c.x *= Float4(1.0f / 0x7F00);
- c.y *= Float4(1.0f / 0x7F00);
- c.z *= Float4(1.0f / 0x7F00);
- c.w *= Float4(1.0f / 0x7F00);
+ c.x = Float4(cs.x) * Float4(1.0f / 0x7F00);
+ c.y = Float4(cs.y) * Float4(1.0f / 0x7F00);
+ c.z = Float4(cs.z) * Float4(1.0f / 0x7F00);
+ c.w = Float4(cs.w) * Float4(1.0f / 0x7F00);
break;
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8G8_UNORM:
@@ -194,77 +251,23 @@
case VK_FORMAT_R8G8B8A8_SRGB:
case VK_FORMAT_R8_SRGB:
case VK_FORMAT_R8G8_SRGB:
- c.x *= Float4(1.0f / 0xFF00u);
- c.y *= Float4(1.0f / 0xFF00u);
- c.z *= Float4(1.0f / 0xFF00u);
- c.w *= Float4(1.0f / 0xFF00u);
+ c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xFF00u);
+ c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFF00u);
+ c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xFF00u);
+ c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xFF00u);
break;
default:
for(int component = 0; component < textureComponentCount(); component++)
{
- c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
+ if(hasUnsignedTextureComponent(component))
+ {
+ convertUnsigned16(c[component], cs[component]);
+ }
+ else
+ {
+ convertSigned15(c[component], cs[component]);
+ }
}
- }
- }
- }
- else // 16-bit filtering.
- {
- Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, function);
-
- switch(state.textureFormat)
- {
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
- break;
- case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000);
- c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000);
- break;
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
- c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000);
- break;
- case VK_FORMAT_R8_SNORM:
- case VK_FORMAT_R8G8_SNORM:
- case VK_FORMAT_R8G8B8A8_SNORM:
- case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
- c.x = Float4(cs.x) * Float4(1.0f / 0x7F00);
- c.y = Float4(cs.y) * Float4(1.0f / 0x7F00);
- c.z = Float4(cs.z) * Float4(1.0f / 0x7F00);
- c.w = Float4(cs.w) * Float4(1.0f / 0x7F00);
- break;
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_R8G8_UNORM:
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_B8G8R8A8_SRGB:
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_R8_SRGB:
- case VK_FORMAT_R8G8_SRGB:
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xFF00u);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFF00u);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xFF00u);
- c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xFF00u);
- break;
- default:
- for(int component = 0; component < textureComponentCount(); component++)
- {
- if(hasUnsignedTextureComponent(component))
- {
- convertUnsigned16(c[component], cs[component]);
- }
- else
- {
- convertSigned15(c[component], cs[component]);
- }
- }
}
}
@@ -321,27 +324,27 @@
{
switch(count)
{
- case -1: return uvw - offset;
- case 0: return uvw;
- case +1: return uvw + offset;
- case 2: return uvw + offset + offset;
+ case -1: return uvw - offset;
+ case 0: return uvw;
+ case +1: return uvw + offset;
+ case 2: return uvw + offset + offset;
}
}
- else // Clamp or mirror
+ else // Clamp or mirror
{
switch(count)
{
- case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
- case 0: return uvw;
- case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
- case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
+ case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
+ case 0: return uvw;
+ case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
+ case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
}
}
return uvw;
}
-Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
+Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
{
Vector4s c = sampleAniso(texture, u, v, w, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, false, function);
@@ -356,21 +359,45 @@
lod *= Float(1 << 16);
- UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize
- Short4 stri = utri >> 1; // FIXME: Optimize
+ UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize
+ Short4 stri = utri >> 1; // FIXME: Optimize
- if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri);
- if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri);
- if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri);
- if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri);
+ if(hasUnsignedTextureComponent(0))
+ cc.x = MulHigh(As<UShort4>(cc.x), utri);
+ else
+ cc.x = MulHigh(cc.x, stri);
+ if(hasUnsignedTextureComponent(1))
+ cc.y = MulHigh(As<UShort4>(cc.y), utri);
+ else
+ cc.y = MulHigh(cc.y, stri);
+ if(hasUnsignedTextureComponent(2))
+ cc.z = MulHigh(As<UShort4>(cc.z), utri);
+ else
+ cc.z = MulHigh(cc.z, stri);
+ if(hasUnsignedTextureComponent(3))
+ cc.w = MulHigh(As<UShort4>(cc.w), utri);
+ else
+ cc.w = MulHigh(cc.w, stri);
utri = ~utri;
stri = Short4(0x7FFF) - stri;
- if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri);
- if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri);
- if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri);
- if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri);
+ if(hasUnsignedTextureComponent(0))
+ c.x = MulHigh(As<UShort4>(c.x), utri);
+ else
+ c.x = MulHigh(c.x, stri);
+ if(hasUnsignedTextureComponent(1))
+ c.y = MulHigh(As<UShort4>(c.y), utri);
+ else
+ c.y = MulHigh(c.y, stri);
+ if(hasUnsignedTextureComponent(2))
+ c.z = MulHigh(As<UShort4>(c.z), utri);
+ else
+ c.z = MulHigh(c.z, stri);
+ if(hasUnsignedTextureComponent(3))
+ c.w = MulHigh(As<UShort4>(c.w), utri);
+ else
+ c.w = MulHigh(c.w, stri);
c.x += cc.x;
c.y += cc.y;
@@ -386,7 +413,7 @@
return c;
}
-Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
+Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
{
Vector4s c;
@@ -405,9 +432,9 @@
cSum.z = Short4(0);
cSum.w = Short4(0);
- Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
- Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
- UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a);
+ Float4 A = *Pointer<Float4>(constants + OFFSET(Constants, uvWeight) + 16 * a);
+ Float4 B = *Pointer<Float4>(constants + OFFSET(Constants, uvStart) + 16 * a);
+ UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants, cWeight) + 8 * a);
Short4 sw = Short4(cw >> 1);
Float4 du = uDelta;
@@ -428,25 +455,49 @@
u0 += du;
v0 += dv;
- if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw);
- if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw);
- if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw);
- if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw);
+ if(hasUnsignedTextureComponent(0))
+ cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw));
+ else
+ cSum.x += MulHigh(c.x, sw);
+ if(hasUnsignedTextureComponent(1))
+ cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw));
+ else
+ cSum.y += MulHigh(c.y, sw);
+ if(hasUnsignedTextureComponent(2))
+ cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw));
+ else
+ cSum.z += MulHigh(c.z, sw);
+ if(hasUnsignedTextureComponent(3))
+ cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw));
+ else
+ cSum.w += MulHigh(c.w, sw);
i++;
}
Until(i >= a);
- if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x);
- if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y);
- if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
- if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
+ if(hasUnsignedTextureComponent(0))
+ c.x = cSum.x;
+ else
+ c.x = AddSat(cSum.x, cSum.x);
+ if(hasUnsignedTextureComponent(1))
+ c.y = cSum.y;
+ else
+ c.y = AddSat(cSum.y, cSum.y);
+ if(hasUnsignedTextureComponent(2))
+ c.z = cSum.z;
+ else
+ c.z = AddSat(cSum.z, cSum.z);
+ if(hasUnsignedTextureComponent(3))
+ c.w = cSum.w;
+ else
+ c.w = AddSat(cSum.w, cSum.w);
}
return c;
}
-Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
{
if(state.textureType != VK_IMAGE_VIEW_TYPE_3D)
{
@@ -458,7 +509,7 @@
}
}
-Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
{
Vector4s c;
@@ -487,21 +538,21 @@
}
else
{
- Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
- Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
- Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
- Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
+ Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
+ Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
+ Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
+ Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
Vector4s c00 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function);
Vector4s c10 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function);
Vector4s c01 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function);
Vector4s c11 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function);
- if(!gather) // Blend
+ if(!gather) // Blend
{
// Fractions
- UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,width)));
- UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,height)));
+ UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width)));
+ UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height)));
UShort4 f1u = ~f0u;
UShort4 f1v = ~f0v;
@@ -532,7 +583,7 @@
{
c00.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0u) + MulHigh(As<UShort4>(c10.x), f0u);
c01.x = As<UShort4>(c01.x) - MulHigh(As<UShort4>(c01.x), f0u) + MulHigh(As<UShort4>(c11.x), f0u);
- c.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v);
+ c.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v);
}
else
{
@@ -552,7 +603,7 @@
}
c.x = (c00.x + c10.x) + (c01.x + c11.x);
- if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions
+ if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions
}
}
@@ -562,7 +613,7 @@
{
c00.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0u) + MulHigh(As<UShort4>(c10.y), f0u);
c01.y = As<UShort4>(c01.y) - MulHigh(As<UShort4>(c01.y), f0u) + MulHigh(As<UShort4>(c11.y), f0u);
- c.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v);
+ c.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v);
}
else
{
@@ -582,7 +633,7 @@
}
c.y = (c00.y + c10.y) + (c01.y + c11.y);
- if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions
+ if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions
}
}
@@ -592,7 +643,7 @@
{
c00.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0u) + MulHigh(As<UShort4>(c10.z), f0u);
c01.z = As<UShort4>(c01.z) - MulHigh(As<UShort4>(c01.z), f0u) + MulHigh(As<UShort4>(c11.z), f0u);
- c.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v);
+ c.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v);
}
else
{
@@ -612,7 +663,7 @@
}
c.z = (c00.z + c10.z) + (c01.z + c11.z);
- if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions
+ if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions
}
}
@@ -622,7 +673,7 @@
{
c00.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0u) + MulHigh(As<UShort4>(c10.w), f0u);
c01.w = As<UShort4>(c01.w) - MulHigh(As<UShort4>(c01.w), f0u) + MulHigh(As<UShort4>(c11.w), f0u);
- c.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v);
+ c.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v);
}
else
{
@@ -642,7 +693,7 @@
}
c.w = (c00.w + c10.w) + (c01.w + c11.w);
- if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions
+ if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions
}
}
}
@@ -651,16 +702,16 @@
VkComponentSwizzle swizzle = gatherSwizzle();
switch(swizzle)
{
- case VK_COMPONENT_SWIZZLE_ZERO:
- case VK_COMPONENT_SWIZZLE_ONE:
- // Handled at the final component swizzle.
- break;
- default:
- c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
- break;
+ case VK_COMPONENT_SWIZZLE_ZERO:
+ case VK_COMPONENT_SWIZZLE_ONE:
+ // Handled at the final component swizzle.
+ break;
+ default:
+ c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
+ break;
}
}
}
@@ -668,7 +719,7 @@
return c;
}
-Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
{
Vector4s c_;
@@ -708,17 +759,17 @@
{
for(int k = 0; k < 2; k++)
{
- u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
- v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
- s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
+ u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
+ v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
+ s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap, wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
}
}
}
// Fractions
- UShort4 f0u = As<UShort4>(u[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,width)));
- UShort4 f0v = As<UShort4>(v[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,height)));
- UShort4 f0s = As<UShort4>(s[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,depth)));
+ UShort4 f0u = As<UShort4>(u[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width)));
+ UShort4 f0v = As<UShort4>(v[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height)));
+ UShort4 f0s = As<UShort4>(s[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, depth)));
UShort4 f1u = ~f0u;
UShort4 f1v = ~f0v;
@@ -766,10 +817,34 @@
{
c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, cubeArrayId, sampleId, buffer, function);
- if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
- if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
- if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); }
- if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); }
+ if(componentCount >= 1)
+ {
+ if(hasUnsignedTextureComponent(0))
+ c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]);
+ else
+ c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]);
+ }
+ if(componentCount >= 2)
+ {
+ if(hasUnsignedTextureComponent(1))
+ c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]);
+ else
+ c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]);
+ }
+ if(componentCount >= 3)
+ {
+ if(hasUnsignedTextureComponent(2))
+ c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]);
+ else
+ c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]);
+ }
+ if(componentCount >= 4)
+ {
+ if(hasUnsignedTextureComponent(3))
+ c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]);
+ else
+ c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]);
+ }
if(i != 0 || j != 0 || k != 0)
{
@@ -788,16 +863,20 @@
if(componentCount >= 4) c_.w = c[0][0][0].w;
// Correct for signed fractions
- if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
- if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
- if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
- if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
+ if(componentCount >= 1)
+ if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
+ if(componentCount >= 2)
+ if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
+ if(componentCount >= 3)
+ if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
+ if(componentCount >= 4)
+ if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
}
return c_;
}
-Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
+Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
{
Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, false, function);
@@ -821,7 +900,7 @@
return c;
}
-Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
+Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
{
Vector4f c;
@@ -840,8 +919,8 @@
cSum.z = Float4(0.0f);
cSum.w = Float4(0.0f);
- Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
- Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
+ Float4 A = *Pointer<Float4>(constants + OFFSET(Constants, uvWeight) + 16 * a);
+ Float4 B = *Pointer<Float4>(constants + OFFSET(Constants, uvStart) + 16 * a);
Float4 du = uDelta;
Float4 dv = vDelta;
@@ -879,7 +958,7 @@
return c;
}
-Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
{
if(state.textureType != VK_IMAGE_VIEW_TYPE_3D)
{
@@ -891,7 +970,7 @@
}
}
-Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
{
Vector4f c;
@@ -935,7 +1014,7 @@
Vector4f c01 = sampleTexel(x0, y1, z0, q, mipmap, cubeArrayId, sampleId, buffer, function);
Vector4f c11 = sampleTexel(x1, y1, z0, q, mipmap, cubeArrayId, sampleId, buffer, function);
- if(!gather) // Blend
+ if(!gather) // Blend
{
if(componentCount >= 1) c00.x = c00.x + fu * (c10.x - c00.x);
if(componentCount >= 2) c00.y = c00.y + fu * (c10.y - c00.y);
@@ -957,16 +1036,16 @@
VkComponentSwizzle swizzle = gatherSwizzle();
switch(swizzle)
{
- case VK_COMPONENT_SWIZZLE_ZERO:
- case VK_COMPONENT_SWIZZLE_ONE:
- // Handled at the final component swizzle.
- break;
- default:
- c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
- c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
- break;
+ case VK_COMPONENT_SWIZZLE_ZERO:
+ case VK_COMPONENT_SWIZZLE_ONE:
+ // Handled at the final component swizzle.
+ break;
+ default:
+ c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
+ c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
+ break;
}
}
}
@@ -974,7 +1053,7 @@
return c;
}
-Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function)
+Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function)
{
Vector4f c;
@@ -1065,18 +1144,18 @@
Float SamplerCore::log2sqrt(Float lod)
{
// log2(sqrt(lod)) // Equals 0.25 * log2(lod^2).
- lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
- lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
- lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length).
+ lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
+ lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
+ lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length).
return lod;
}
Float SamplerCore::log2(Float lod)
{
- lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
- lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
- lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length).
+ lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
+ lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
+ lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length).
return lod;
}
@@ -1085,7 +1164,7 @@
{
Float4 duvdxy;
- if(function != Grad) // Implicit
+ if(function != Grad) // Implicit
{
duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
}
@@ -1103,7 +1182,7 @@
Float4 dUV2dxy = dUVdxy * dUVdxy;
Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
- lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis
+ lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis
if(state.textureFilter == FILTER_ANISOTROPIC)
{
@@ -1119,12 +1198,12 @@
vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
anisotropy = lod * Rcp_pp(det);
- anisotropy = Min(anisotropy, *Pointer<Float>(sampler + OFFSET(vk::Sampler,maxAnisotropy)));
+ anisotropy = Min(anisotropy, *Pointer<Float>(sampler + OFFSET(vk::Sampler, maxAnisotropy)));
lod *= Rcp_pp(anisotropy * anisotropy);
}
- lod = log2sqrt(lod); // log2(sqrt(lod))
+ lod = log2sqrt(lod); // log2(sqrt(lod))
}
void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, Float4 &M, SamplerFunction function)
@@ -1160,10 +1239,10 @@
dudxy = Max(Max(duvdxy, dusdxy), dvsdxy);
- lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
+ lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
// Scale by texture dimension.
- lod *= *Pointer<Float>(texture + OFFSET(Texture,width));
+ lod *= *Pointer<Float>(texture + OFFSET(Texture, width));
lod = log2(lod);
}
@@ -1172,7 +1251,7 @@
{
Float4 dudxy, dvdxy, dsdxy;
- if(function != Grad) // Implicit
+ if(function != Grad) // Implicit
{
dudxy = uuuu - uuuu.xxxx;
dvdxy = vvvv - vvvv.xxxx;
@@ -1197,9 +1276,9 @@
dudxy += dvdxy;
dudxy += dsdxy;
- lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
+ lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
- lod = log2sqrt(lod); // log2(sqrt(lod))
+ lod = log2sqrt(lod); // log2(sqrt(lod))
}
Int4 SamplerCore::cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
@@ -1207,20 +1286,20 @@
// TODO: Comply with Vulkan recommendation:
// Vulkan 1.1: "The rules should have as the first rule that rz wins over ry and rx, and the second rule that ry wins over rx."
- Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0
- Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0
- Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0
+ Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0
+ Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0
+ Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0
Float4 absX = Abs(x);
Float4 absY = Abs(y);
Float4 absZ = Abs(z);
- Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y)
- Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z)
- Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x)
- Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z)
- Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x)
- Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y)
+ Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y)
+ Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z)
+ Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x)
+ Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z)
+ Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x)
+ Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y)
// FACE_POSITIVE_X = 000b
// FACE_NEGATIVE_X = 001b
@@ -1235,14 +1314,14 @@
Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
Int negative = SignMask(n);
- Int faces = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
- faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
- faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
+ Int faces = *Pointer<Int>(constants + OFFSET(Constants, transposeBit0) + negative * 4);
+ faces |= *Pointer<Int>(constants + OFFSET(Constants, transposeBit1) + yAxis * 4);
+ faces |= *Pointer<Int>(constants + OFFSET(Constants, transposeBit2) + zAxis * 4);
Int4 face;
face.x = faces & 0x7;
- face.y = (faces >> 4) & 0x7;
- face.z = (faces >> 8) & 0x7;
+ face.y = (faces >> 4) & 0x7;
+ face.z = (faces >> 8) & 0x7;
face.w = (faces >> 12) & 0x7;
M = Max(Max(absX, absY), Max(absZ, Float4(std::numeric_limits<float>::min())));
@@ -1267,27 +1346,27 @@
switch(mode)
{
- case AddressingMode::ADDRESSING_WRAP:
- tmp = (tmp + whd * Int4(-MIN_TEXEL_OFFSET)) % whd;
- break;
- case AddressingMode::ADDRESSING_CLAMP:
- case AddressingMode::ADDRESSING_MIRROR:
- case AddressingMode::ADDRESSING_MIRRORONCE:
- case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
- tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
- break;
- case ADDRESSING_TEXELFETCH:
- break;
- case AddressingMode::ADDRESSING_SEAMLESS:
- ASSERT(false); // Cube sampling doesn't support offset.
- default:
- ASSERT(false);
+ case AddressingMode::ADDRESSING_WRAP:
+ tmp = (tmp + whd * Int4(-MIN_TEXEL_OFFSET)) % whd;
+ break;
+ case AddressingMode::ADDRESSING_CLAMP:
+ case AddressingMode::ADDRESSING_MIRROR:
+ case AddressingMode::ADDRESSING_MIRRORONCE:
+ case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
+ tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
+ break;
+ case ADDRESSING_TEXELFETCH:
+ break;
+ case AddressingMode::ADDRESSING_SEAMLESS:
+ ASSERT(false); // Cube sampling doesn't support offset.
+ default:
+ ASSERT(false);
}
return As<Short4>(UShort4(tmp));
}
-void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, SamplerFunction function)
+void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function)
{
bool texelFetch = (function == Fetch);
bool hasOffset = (function.offset != 0);
@@ -1309,8 +1388,8 @@
Short4 uuu2 = uuuu;
uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
- uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
- uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
+ uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap, onePitchP))));
+ uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap, onePitchP))));
if(hasThirdCoordinate())
{
@@ -1380,7 +1459,7 @@
}
}
-void SamplerCore::computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, SamplerFunction function)
+void SamplerCore::computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function)
{
UInt4 indices = uuuu + vvvv;
@@ -1426,32 +1505,32 @@
switch(state.textureFormat)
{
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- c.z = (c.x & Short4(0x001Fu)) << 11;
- c.y = (c.x & Short4(0x07E0u)) << 5;
- c.x = (c.x & Short4(0xF800u));
- break;
- case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
- c.w = (c.x << 12) & Short4(0xF000u);
- c.z = (c.x) & Short4(0xF000u);
- c.y = (c.x << 4) & Short4(0xF000u);
- c.x = (c.x << 8) & Short4(0xF000u);
- break;
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- c.w = (c.x) & Short4(0x8000u);
- c.z = (c.x << 11) & Short4(0xF800u);
- c.y = (c.x << 6) & Short4(0xF800u);
- c.x = (c.x << 1) & Short4(0xF800u);
- break;
- default:
- ASSERT(false);
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ c.z = (c.x & Short4(0x001Fu)) << 11;
+ c.y = (c.x & Short4(0x07E0u)) << 5;
+ c.x = (c.x & Short4(0xF800u));
+ break;
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ c.w = (c.x << 12) & Short4(0xF000u);
+ c.z = (c.x) & Short4(0xF000u);
+ c.y = (c.x << 4) & Short4(0xF000u);
+ c.x = (c.x << 8) & Short4(0xF000u);
+ break;
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ c.w = (c.x) & Short4(0x8000u);
+ c.z = (c.x << 11) & Short4(0xF800u);
+ c.y = (c.x << 6) & Short4(0xF800u);
+ c.x = (c.x << 1) & Short4(0xF800u);
+ break;
+ default:
+ ASSERT(false);
}
}
else if(has8bitTextureComponents())
{
switch(textureComponentCount())
{
- case 4:
+ case 4:
{
Byte4 c0 = Pointer<Byte4>(buffer)[index[0]];
Byte4 c1 = Pointer<Byte4>(buffer)[index[1]];
@@ -1462,80 +1541,80 @@
switch(state.textureFormat)
{
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_SRGB:
- c.z = As<Short4>(UnpackLow(c.x, c.y));
- c.x = As<Short4>(UnpackHigh(c.x, c.y));
- c.y = c.z;
- c.w = c.x;
- c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
- c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
- c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
- c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
- break;
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_R8G8B8A8_SINT:
- case VK_FORMAT_R8G8B8A8_SNORM:
- case VK_FORMAT_R8G8B8A8_SRGB:
- c.z = As<Short4>(UnpackHigh(c.x, c.y));
- c.x = As<Short4>(UnpackLow(c.x, c.y));
- c.y = c.x;
- c.w = c.z;
- c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
- c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
- c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
- c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
- // Propagate sign bit
- if(state.textureFormat == VK_FORMAT_R8G8B8A8_SINT)
- {
- c.x >>= 8;
- c.y >>= 8;
- c.z >>= 8;
- c.w >>= 8;
- }
- break;
- case VK_FORMAT_R8G8B8A8_UINT:
- c.z = As<Short4>(UnpackHigh(c.x, c.y));
- c.x = As<Short4>(UnpackLow(c.x, c.y));
- c.y = c.x;
- c.w = c.z;
- c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
- c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
- c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
- c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
- break;
- default:
- ASSERT(false);
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ c.z = As<Short4>(UnpackLow(c.x, c.y));
+ c.x = As<Short4>(UnpackHigh(c.x, c.y));
+ c.y = c.z;
+ c.w = c.x;
+ c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
+ c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
+ c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
+ c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
+ break;
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SINT:
+ case VK_FORMAT_R8G8B8A8_SNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ c.z = As<Short4>(UnpackHigh(c.x, c.y));
+ c.x = As<Short4>(UnpackLow(c.x, c.y));
+ c.y = c.x;
+ c.w = c.z;
+ c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
+ c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
+ c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
+ c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
+ // Propagate sign bit
+ if(state.textureFormat == VK_FORMAT_R8G8B8A8_SINT)
+ {
+ c.x >>= 8;
+ c.y >>= 8;
+ c.z >>= 8;
+ c.w >>= 8;
+ }
+ break;
+ case VK_FORMAT_R8G8B8A8_UINT:
+ c.z = As<Short4>(UnpackHigh(c.x, c.y));
+ c.x = As<Short4>(UnpackLow(c.x, c.y));
+ c.y = c.x;
+ c.w = c.z;
+ c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
+ c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
+ c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
+ c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
+ break;
+ default:
+ ASSERT(false);
}
}
break;
- case 2:
- c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
- c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
- c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
- c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
+ case 2:
+ c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
+ c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
+ c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
+ c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
- switch(state.textureFormat)
- {
- case VK_FORMAT_R8G8_UNORM:
- case VK_FORMAT_R8G8_SNORM:
- case VK_FORMAT_R8G8_SRGB:
- c.y = (c.x & Short4(0xFF00u));
- c.x = (c.x << 8);
+ switch(state.textureFormat)
+ {
+ case VK_FORMAT_R8G8_UNORM:
+ case VK_FORMAT_R8G8_SNORM:
+ case VK_FORMAT_R8G8_SRGB:
+ c.y = (c.x & Short4(0xFF00u));
+ c.x = (c.x << 8);
+ break;
+ case VK_FORMAT_R8G8_SINT:
+ c.y = c.x >> 8;
+ c.x = (c.x << 8) >> 8; // Propagate sign bit
+ break;
+ case VK_FORMAT_R8G8_UINT:
+ c.y = As<Short4>(As<UShort4>(c.x) >> 8);
+ c.x &= Short4(0x00FFu);
+ break;
+ default:
+ ASSERT(false);
+ }
break;
- case VK_FORMAT_R8G8_SINT:
- c.y = c.x >> 8;
- c.x = (c.x << 8) >> 8; // Propagate sign bit
- break;
- case VK_FORMAT_R8G8_UINT:
- c.y = As<Short4>(As<UShort4>(c.x) >> 8);
- c.x &= Short4(0x00FFu);
- break;
- default:
- ASSERT(false);
- }
- break;
- case 1:
+ case 1:
{
Int c0 = Int(*Pointer<Byte>(buffer + index[0]));
Int c1 = Int(*Pointer<Byte>(buffer + index[1]));
@@ -1545,9 +1624,9 @@
switch(state.textureFormat)
{
- case VK_FORMAT_R8_SINT:
- case VK_FORMAT_R8_UINT:
- case VK_FORMAT_S8_UINT:
+ case VK_FORMAT_R8_SINT:
+ case VK_FORMAT_R8_UINT:
+ case VK_FORMAT_S8_UINT:
{
Int zero(0);
c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
@@ -1558,58 +1637,58 @@
}
}
break;
- case VK_FORMAT_R8_SNORM:
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_R8_SRGB:
- // TODO: avoid populating the low bits at all.
- c.x = Unpack(As<Byte4>(c0));
- c.x &= Short4(0xFF00u);
- break;
- default:
- c.x = Unpack(As<Byte4>(c0));
- break;
+ case VK_FORMAT_R8_SNORM:
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_R8_SRGB:
+ // TODO: avoid populating the low bits at all.
+ c.x = Unpack(As<Byte4>(c0));
+ c.x &= Short4(0xFF00u);
+ break;
+ default:
+ c.x = Unpack(As<Byte4>(c0));
+ break;
}
}
break;
- default:
- ASSERT(false);
+ default:
+ ASSERT(false);
}
}
else if(has16bitTextureComponents())
{
switch(textureComponentCount())
{
- case 4:
- c.x = Pointer<Short4>(buffer)[index[0]];
- c.y = Pointer<Short4>(buffer)[index[1]];
- c.z = Pointer<Short4>(buffer)[index[2]];
- c.w = Pointer<Short4>(buffer)[index[3]];
- transpose4x4(c.x, c.y, c.z, c.w);
- break;
- case 3:
- c.x = Pointer<Short4>(buffer)[index[0]];
- c.y = Pointer<Short4>(buffer)[index[1]];
- c.z = Pointer<Short4>(buffer)[index[2]];
- c.w = Pointer<Short4>(buffer)[index[3]];
- transpose4x3(c.x, c.y, c.z, c.w);
- break;
- case 2:
- c.x = *Pointer<Short4>(buffer + 4 * index[0]);
- c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1])));
- c.z = *Pointer<Short4>(buffer + 4 * index[2]);
- c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3])));
- c.y = c.x;
- c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
- c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
- break;
- case 1:
- c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
- c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
- c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
- c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
- break;
- default:
- ASSERT(false);
+ case 4:
+ c.x = Pointer<Short4>(buffer)[index[0]];
+ c.y = Pointer<Short4>(buffer)[index[1]];
+ c.z = Pointer<Short4>(buffer)[index[2]];
+ c.w = Pointer<Short4>(buffer)[index[3]];
+ transpose4x4(c.x, c.y, c.z, c.w);
+ break;
+ case 3:
+ c.x = Pointer<Short4>(buffer)[index[0]];
+ c.y = Pointer<Short4>(buffer)[index[1]];
+ c.z = Pointer<Short4>(buffer)[index[2]];
+ c.w = Pointer<Short4>(buffer)[index[3]];
+ transpose4x3(c.x, c.y, c.z, c.w);
+ break;
+ case 2:
+ c.x = *Pointer<Short4>(buffer + 4 * index[0]);
+ c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1])));
+ c.z = *Pointer<Short4>(buffer + 4 * index[2]);
+ c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3])));
+ c.y = c.x;
+ c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
+ c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
+ break;
+ case 1:
+ c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
+ c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
+ c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
+ c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
+ break;
+ default:
+ ASSERT(false);
}
}
else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UNORM_PACK32)
@@ -1639,12 +1718,13 @@
cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
- c.x = Short4(((cc) & Int4(0x3FF)));
+ c.x = Short4(((cc)&Int4(0x3FF)));
c.y = Short4(((cc >> 10) & Int4(0x3FF)));
c.z = Short4(((cc >> 20) & Int4(0x3FF)));
c.w = Short4(((cc >> 30) & Int4(0x3)));
}
- else ASSERT(false);
+ else
+ ASSERT(false);
if(state.textureFormat.isSRGBformat())
{
@@ -1660,7 +1740,7 @@
return c;
}
-Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function)
+Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function)
{
Vector4s c;
@@ -1670,7 +1750,7 @@
if(isYcbcrFormat())
{
// Pointers to the planes of YCbCr images are stored in consecutive mipmap levels.
- Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
+ Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats.
Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
@@ -1715,7 +1795,8 @@
U = (UV & Short4(0x00FFu)) | (UV << 8);
V = (UV & Short4(0xFF00u)) | As<Short4>(As<UShort4>(UV) >> 8);
}
- else UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
+ else
+ UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
if(!state.swappedChroma)
{
@@ -1733,19 +1814,19 @@
{
// YCbCr formats are treated as signed 15-bit.
c.x = Cr >> 1;
- c.y = Y >> 1;
+ c.y = Y >> 1;
c.z = Cb >> 1;
}
else
{
// Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
// Scale down by 0x0101 to normalize the 8.8 samples, and up by 0x7FFF for signed 15-bit output.
- float yOffset = static_cast<float>(state.studioSwing ? 16 * 0x0101 : 0);
+ float yOffset = static_cast<float>(state.studioSwing ? 16 * 0x0101 : 0);
float uvOffset = static_cast<float>(128 * 0x0101);
- float yFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 219 * 0x0101 : 255 * 0x0101);
+ float yFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 219 * 0x0101 : 255 * 0x0101);
float uvFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 224 * 0x0101 : 255 * 0x0101);
- Float4 y = (Float4(Y) - Float4(yOffset)) * Float4(yFactor);
+ Float4 y = (Float4(Y) - Float4(yOffset)) * Float4(yFactor);
Float4 u = (Float4(Cb) - Float4(uvOffset)) * Float4(uvFactor);
Float4 v = (Float4(Cr) - Float4(uvOffset)) * Float4(uvFactor);
@@ -1767,20 +1848,20 @@
switch(state.ycbcrModel)
{
- case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
- Kb = 0.0722f;
- Kr = 0.2126f;
- break;
- case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
- Kb = 0.114f;
- Kr = 0.299f;
- break;
- case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
- Kb = 0.0593f;
- Kr = 0.2627f;
- break;
- default:
- UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel));
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
+ Kb = 0.0722f;
+ Kr = 0.2126f;
+ break;
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
+ Kb = 0.114f;
+ Kr = 0.299f;
+ break;
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
+ Kb = 0.0593f;
+ Kr = 0.2627f;
+ break;
+ default:
+ UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel));
}
const float Kg = 1.0f - Kr - Kb;
@@ -1790,9 +1871,9 @@
const float Gr = -2 * Kr * (1 - Kr) / Kg;
const float Bb = 2 * (1 - Kb);
- Float4 r = y + Float4(Rr) * v;
+ Float4 r = y + Float4(Rr) * v;
Float4 g = y + Float4(Gb) * u + Float4(Gr) * v;
- Float4 b = y + Float4(Bb) * u ;
+ Float4 b = y + Float4(Bb) * u;
c.x = Short4(r);
c.y = Short4(g);
@@ -1808,7 +1889,7 @@
return c;
}
-Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function)
+Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function)
{
Int4 valid;
@@ -1832,111 +1913,111 @@
{
switch(state.textureFormat)
{
- case VK_FORMAT_R16_SFLOAT:
- t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2));
- t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2));
- t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2));
- t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2));
+ case VK_FORMAT_R16_SFLOAT:
+ t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2));
+ t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2));
+ t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2));
+ t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2));
- c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
- c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
- c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
- c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
- break;
- case VK_FORMAT_R16G16_SFLOAT:
- t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4));
- t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4));
- t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4));
- t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4));
+ c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
+ c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
+ c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
+ c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
+ break;
+ case VK_FORMAT_R16G16_SFLOAT:
+ t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4));
+ t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4));
+ t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4));
+ t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4));
- // FIXME: shuffles
- c.x = As<Float4>(halfToFloatBits(t0));
- c.y = As<Float4>(halfToFloatBits(t1));
- c.z = As<Float4>(halfToFloatBits(t2));
- c.w = As<Float4>(halfToFloatBits(t3));
- transpose4x4(c.x, c.y, c.z, c.w);
- break;
- case VK_FORMAT_R16G16B16A16_SFLOAT:
- t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8));
- t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8));
- t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8));
- t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8));
+ // FIXME: shuffles
+ c.x = As<Float4>(halfToFloatBits(t0));
+ c.y = As<Float4>(halfToFloatBits(t1));
+ c.z = As<Float4>(halfToFloatBits(t2));
+ c.w = As<Float4>(halfToFloatBits(t3));
+ transpose4x4(c.x, c.y, c.z, c.w);
+ break;
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8));
+ t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8));
+ t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8));
+ t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8));
- c.x = As<Float4>(halfToFloatBits(t0));
- c.y = As<Float4>(halfToFloatBits(t1));
- c.z = As<Float4>(halfToFloatBits(t2));
- c.w = As<Float4>(halfToFloatBits(t3));
- transpose4x4(c.x, c.y, c.z, c.w);
- break;
- case VK_FORMAT_R32_SFLOAT:
- case VK_FORMAT_R32_SINT:
- case VK_FORMAT_R32_UINT:
- case VK_FORMAT_D32_SFLOAT:
- // FIXME: Optimal shuffling?
- c.x.x = *Pointer<Float>(buffer + index[0] * 4);
- c.x.y = *Pointer<Float>(buffer + index[1] * 4);
- c.x.z = *Pointer<Float>(buffer + index[2] * 4);
- c.x.w = *Pointer<Float>(buffer + index[3] * 4);
- break;
- case VK_FORMAT_R32G32_SFLOAT:
- case VK_FORMAT_R32G32_SINT:
- case VK_FORMAT_R32G32_UINT:
- // FIXME: Optimal shuffling?
- c.x.xy = *Pointer<Float4>(buffer + index[0] * 8);
- c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8);
- c.z.xy = *Pointer<Float4>(buffer + index[2] * 8);
- c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8);
- c.y = c.x;
- c.x = Float4(c.x.xz, c.z.xz);
- c.y = Float4(c.y.yw, c.z.yw);
- break;
- case VK_FORMAT_R32G32B32_SFLOAT:
- case VK_FORMAT_R32G32B32_SINT:
- case VK_FORMAT_R32G32B32_UINT:
- c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
- c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
- c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
- c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
- transpose4x3(c.x, c.y, c.z, c.w);
- break;
- case VK_FORMAT_R32G32B32A32_SFLOAT:
- case VK_FORMAT_R32G32B32A32_SINT:
- case VK_FORMAT_R32G32B32A32_UINT:
- c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
- c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
- c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
- c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
- transpose4x4(c.x, c.y, c.z, c.w);
- break;
- case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
- {
- Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
- t.x = *Pointer<Float>(buffer + index[0] * 4);
- t.y = *Pointer<Float>(buffer + index[1] * 4);
- t.z = *Pointer<Float>(buffer + index[2] * 4);
- t.w = *Pointer<Float>(buffer + index[3] * 4);
- t0 = As<UInt4>(t);
- c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24));
- c.x = Float4((t0) & UInt4(0x1FF)) * c.w;
- c.y = Float4((t0 >> 9) & UInt4(0x1FF)) * c.w;
- c.z = Float4((t0 >> 18) & UInt4(0x1FF)) * c.w;
- break;
- }
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- {
- Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
- t.x = *Pointer<Float>(buffer + index[0] * 4);
- t.y = *Pointer<Float>(buffer + index[1] * 4);
- t.z = *Pointer<Float>(buffer + index[2] * 4);
- t.w = *Pointer<Float>(buffer + index[3] * 4);
- t0 = As<UInt4>(t);
- c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0)));
- c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0)));
- c.z = As<Float4>(halfToFloatBits((t0 >> 17) & UInt4(0x7FE0)));
- break;
- }
- default:
- UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat));
+ c.x = As<Float4>(halfToFloatBits(t0));
+ c.y = As<Float4>(halfToFloatBits(t1));
+ c.z = As<Float4>(halfToFloatBits(t2));
+ c.w = As<Float4>(halfToFloatBits(t3));
+ transpose4x4(c.x, c.y, c.z, c.w);
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_D32_SFLOAT:
+ // FIXME: Optimal shuffling?
+ c.x.x = *Pointer<Float>(buffer + index[0] * 4);
+ c.x.y = *Pointer<Float>(buffer + index[1] * 4);
+ c.x.z = *Pointer<Float>(buffer + index[2] * 4);
+ c.x.w = *Pointer<Float>(buffer + index[3] * 4);
+ break;
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32_UINT:
+ // FIXME: Optimal shuffling?
+ c.x.xy = *Pointer<Float4>(buffer + index[0] * 8);
+ c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8);
+ c.z.xy = *Pointer<Float4>(buffer + index[2] * 8);
+ c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8);
+ c.y = c.x;
+ c.x = Float4(c.x.xz, c.z.xz);
+ c.y = Float4(c.y.yw, c.z.yw);
+ break;
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SINT:
+ case VK_FORMAT_R32G32B32_UINT:
+ c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
+ c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
+ c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
+ c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
+ transpose4x3(c.x, c.y, c.z, c.w);
+ break;
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
+ c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
+ c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
+ c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
+ transpose4x4(c.x, c.y, c.z, c.w);
+ break;
+ case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ {
+ Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
+ t.x = *Pointer<Float>(buffer + index[0] * 4);
+ t.y = *Pointer<Float>(buffer + index[1] * 4);
+ t.z = *Pointer<Float>(buffer + index[2] * 4);
+ t.w = *Pointer<Float>(buffer + index[3] * 4);
+ t0 = As<UInt4>(t);
+ c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24));
+ c.x = Float4((t0)&UInt4(0x1FF)) * c.w;
+ c.y = Float4((t0 >> 9) & UInt4(0x1FF)) * c.w;
+ c.z = Float4((t0 >> 18) & UInt4(0x1FF)) * c.w;
+ break;
+ }
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ {
+ Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
+ t.x = *Pointer<Float>(buffer + index[0] * 4);
+ t.y = *Pointer<Float>(buffer + index[1] * 4);
+ t.z = *Pointer<Float>(buffer + index[2] * 4);
+ t.w = *Pointer<Float>(buffer + index[3] * 4);
+ t0 = As<UInt4>(t);
+ c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0)));
+ c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0)));
+ c.z = As<Float4>(halfToFloatBits((t0 >> 17) & UInt4(0x7FE0)));
+ break;
+ }
+ default:
+ UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat));
}
}
else
@@ -1989,15 +2070,15 @@
switch(state.compareOp)
{
- case VK_COMPARE_OP_LESS_OR_EQUAL: boolean = CmpLE(ref, c.x); break;
- case VK_COMPARE_OP_GREATER_OR_EQUAL: boolean = CmpNLT(ref, c.x); break;
- case VK_COMPARE_OP_LESS: boolean = CmpLT(ref, c.x); break;
- case VK_COMPARE_OP_GREATER: boolean = CmpNLE(ref, c.x); break;
- case VK_COMPARE_OP_EQUAL: boolean = CmpEQ(ref, c.x); break;
- case VK_COMPARE_OP_NOT_EQUAL: boolean = CmpNEQ(ref, c.x); break;
- case VK_COMPARE_OP_ALWAYS: boolean = Int4(-1); break;
- case VK_COMPARE_OP_NEVER: boolean = Int4(0); break;
- default: ASSERT(false);
+ case VK_COMPARE_OP_LESS_OR_EQUAL: boolean = CmpLE(ref, c.x); break;
+ case VK_COMPARE_OP_GREATER_OR_EQUAL: boolean = CmpNLT(ref, c.x); break;
+ case VK_COMPARE_OP_LESS: boolean = CmpLT(ref, c.x); break;
+ case VK_COMPARE_OP_GREATER: boolean = CmpNLE(ref, c.x); break;
+ case VK_COMPARE_OP_EQUAL: boolean = CmpEQ(ref, c.x); break;
+ case VK_COMPARE_OP_NOT_EQUAL: boolean = CmpNEQ(ref, c.x); break;
+ case VK_COMPARE_OP_ALWAYS: boolean = Int4(-1); break;
+ case VK_COMPARE_OP_NEVER: boolean = Int4(0); break;
+ default: ASSERT(false);
}
c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f)));
@@ -2025,29 +2106,29 @@
switch(state.border)
{
- case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
- case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
- borderRGB = Int4(0);
- borderA = Int4(0);
- break;
- case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
- borderRGB = Int4(0);
- borderA = float_one;
- break;
- case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
- borderRGB = Int4(0);
- borderA = Int4(1);
- break;
- case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
- borderRGB = float_one;
- borderA = float_one;
- break;
- case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
- borderRGB = Int4(1);
- borderA = Int4(1);
- break;
- default:
- UNIMPLEMENTED("sint/uint/sfloat border: %u", state.border);
+ case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
+ case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
+ borderRGB = Int4(0);
+ borderA = Int4(0);
+ break;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
+ borderRGB = Int4(0);
+ borderA = float_one;
+ break;
+ case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
+ borderRGB = Int4(0);
+ borderA = Int4(1);
+ break;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
+ borderRGB = float_one;
+ borderA = float_one;
+ break;
+ case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
+ borderRGB = Int4(1);
+ borderA = Int4(1);
+ break;
+ default:
+ UNIMPLEMENTED("sint/uint/sfloat border: %u", state.border);
}
Vector4f out;
@@ -2076,7 +2157,7 @@
// TODO: Preferred formula is ceil(lod + 0.5) - 1
ilod = RoundInt(lod);
}
- else // MIPMAP_LINEAR
+ else // MIPMAP_LINEAR
{
ilod = Int(lod);
}
@@ -2148,7 +2229,7 @@
return As<Short4>(Int2(convert)) + Short4(0x8000u);
}
- else // Wrap
+ else // Wrap
{
return Short4(Int4(uw * Float4(1 << 16)));
}
@@ -2182,7 +2263,7 @@
{
xyz0 = Min(Max(((function.offset != 0) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
}
- else if(addressingMode == ADDRESSING_LAYER) // Note: Offset does not apply to array layers
+ else if(addressingMode == ADDRESSING_LAYER) // Note: Offset does not apply to array layers
{
// For cube maps, the layer argument is per cube, each of which has 6 layers
if(state.textureType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
@@ -2198,9 +2279,9 @@
}
else
{
- const int halfBits = 0x3EFFFFFF; // Value just under 0.5f
- const int oneBits = 0x3F7FFFFF; // Value just under 1.0f
- const int twoBits = 0x3FFFFFFF; // Value just under 2.0f
+ const int halfBits = 0x3EFFFFFF; // Value just under 0.5f
+ const int oneBits = 0x3F7FFFFF; // Value just under 1.0f
+ const int twoBits = 0x3FFFFFFF; // Value just under 2.0f
bool pointFilter = state.textureFilter == FILTER_POINT ||
state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR ||
@@ -2212,17 +2293,17 @@
{
switch(addressingMode)
{
- case ADDRESSING_CLAMP:
- coord = Min(Max(coord, Float4(0.0f)), Float4(dim) * As<Float4>(Int4(oneBits)));
- break;
- case ADDRESSING_BORDER:
- // Don't map to a valid range here.
- break;
- default:
- // If unnormalizedCoordinates is VK_TRUE, addressModeU and addressModeV must each be
- // either VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE or VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER
- UNREACHABLE("addressingMode %d", int(addressingMode));
- break;
+ case ADDRESSING_CLAMP:
+ coord = Min(Max(coord, Float4(0.0f)), Float4(dim) * As<Float4>(Int4(oneBits)));
+ break;
+ case ADDRESSING_BORDER:
+ // Don't map to a valid range here.
+ break;
+ default:
+ // If unnormalizedCoordinates is VK_TRUE, addressModeU and addressModeV must each be
+ // either VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE or VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER
+ UNREACHABLE("addressingMode %d", int(addressingMode));
+ break;
}
}
else if(state.textureFilter == FILTER_GATHER && addressingMode == ADDRESSING_MIRROR)
@@ -2244,8 +2325,8 @@
xyz1 = xyz0 + Int4(1);
- xyz0 = (maxXYZ) - mirror(mod(xyz0, Int4(2) * dim) - dim);
- xyz1 = (maxXYZ) - mirror(mod(xyz1, Int4(2) * dim) - dim);
+ xyz0 = (maxXYZ)-mirror(mod(xyz0, Int4(2) * dim) - dim);
+ xyz1 = (maxXYZ)-mirror(mod(xyz1, Int4(2) * dim) - dim);
return;
}
@@ -2255,17 +2336,17 @@
{
switch(addressingMode)
{
- case ADDRESSING_CLAMP:
- case ADDRESSING_SEAMLESS:
- // Linear filtering of cube doesn't require clamping because the coordinates
- // are already in [0, 1] range and numerical imprecision is tolerated.
- if(addressingMode != ADDRESSING_SEAMLESS || pointFilter)
- {
- Float4 one = As<Float4>(Int4(oneBits));
- coord = Min(Max(coord, Float4(0.0f)), one);
- }
- break;
- case ADDRESSING_MIRROR:
+ case ADDRESSING_CLAMP:
+ case ADDRESSING_SEAMLESS:
+ // Linear filtering of cube doesn't require clamping because the coordinates
+ // are already in [0, 1] range and numerical imprecision is tolerated.
+ if(addressingMode != ADDRESSING_SEAMLESS || pointFilter)
+ {
+ Float4 one = As<Float4>(Int4(oneBits));
+ coord = Min(Max(coord, Float4(0.0f)), one);
+ }
+ break;
+ case ADDRESSING_MIRROR:
{
Float4 half = As<Float4>(Int4(halfBits));
Float4 one = As<Float4>(Int4(oneBits));
@@ -2273,7 +2354,7 @@
coord = one - Abs(two * Frac(coord * half) - one);
}
break;
- case ADDRESSING_MIRRORONCE:
+ case ADDRESSING_MIRRORONCE:
{
Float4 half = As<Float4>(Int4(halfBits));
Float4 one = As<Float4>(Int4(oneBits));
@@ -2281,12 +2362,12 @@
coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one);
}
break;
- case ADDRESSING_BORDER:
- // Don't map to a valid range here.
- break;
- default: // Wrap
- coord = Frac(coord);
- break;
+ case ADDRESSING_BORDER:
+ // Don't map to a valid range here.
+ break;
+ default: // Wrap
+ coord = Frac(coord);
+ break;
}
}
@@ -2331,7 +2412,7 @@
xyz0 += Int4(1);
}
- xyz1 = xyz0 - filter; // Increment
+ xyz1 = xyz0 - filter; // Increment
if(addressingMode == ADDRESSING_BORDER)
{
@@ -2345,41 +2426,41 @@
{
switch(addressingMode)
{
- case ADDRESSING_SEAMLESS:
- UNREACHABLE("addressingMode %d", int(addressingMode)); // Cube sampling doesn't support offset.
- case ADDRESSING_MIRROR:
- case ADDRESSING_MIRRORONCE:
- // TODO: Implement ADDRESSING_MIRROR and ADDRESSING_MIRRORONCE.
- // Fall through to Clamp.
- case ADDRESSING_CLAMP:
- xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
- xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
- break;
- default: // Wrap
- xyz0 = mod(xyz0, dim);
- xyz1 = mod(xyz1, dim);
- break;
+ case ADDRESSING_SEAMLESS:
+ UNREACHABLE("addressingMode %d", int(addressingMode)); // Cube sampling doesn't support offset.
+ case ADDRESSING_MIRROR:
+ case ADDRESSING_MIRRORONCE:
+ // TODO: Implement ADDRESSING_MIRROR and ADDRESSING_MIRRORONCE.
+ // Fall through to Clamp.
+ case ADDRESSING_CLAMP:
+ xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
+ xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
+ break;
+ default: // Wrap
+ xyz0 = mod(xyz0, dim);
+ xyz1 = mod(xyz1, dim);
+ break;
}
}
else if(state.textureFilter != FILTER_POINT)
{
switch(addressingMode)
{
- case ADDRESSING_SEAMLESS:
- break;
- case ADDRESSING_MIRROR:
- case ADDRESSING_MIRRORONCE:
- case ADDRESSING_CLAMP:
- xyz0 = Max(xyz0, Int4(0));
- xyz1 = Min(xyz1, maxXYZ);
- break;
- default: // Wrap
+ case ADDRESSING_SEAMLESS:
+ break;
+ case ADDRESSING_MIRROR:
+ case ADDRESSING_MIRRORONCE:
+ case ADDRESSING_CLAMP:
+ xyz0 = Max(xyz0, Int4(0));
+ xyz1 = Min(xyz1, maxXYZ);
+ break;
+ default: // Wrap
{
Int4 under = CmpLT(xyz0, Int4(0));
- xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // TODO: IfThenElse()
+ xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // TODO: IfThenElse()
Int4 nover = CmpLT(xyz1, dim);
- xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz
+ xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz
}
break;
}
@@ -2401,7 +2482,7 @@
{
c = As<UShort4>(c) >> 8;
- Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16));
+ Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants, sRGBtoLinear8_16));
c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
@@ -2483,13 +2564,13 @@
{
switch(state.gatherComponent)
{
- case 0: return state.swizzle.r;
- case 1: return state.swizzle.g;
- case 2: return state.swizzle.b;
- case 3: return state.swizzle.a;
- default:
- UNREACHABLE("Invalid component");
- return VK_COMPONENT_SWIZZLE_R;
+ case 0: return state.swizzle.r;
+ case 1: return state.swizzle.g;
+ case 2: return state.swizzle.b;
+ case 3: return state.swizzle.a;
+ default:
+ UNREACHABLE("Invalid component");
+ return VK_COMPONENT_SWIZZLE_R;
}
}
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp
index 396a9f6..b1e925f 100644
--- a/src/Pipeline/SamplerCore.hpp
+++ b/src/Pipeline/SamplerCore.hpp
@@ -21,7 +21,7 @@
#include "Reactor/Reactor.hpp"
#ifdef None
-#undef None // b/127920555
+# undef None // b/127920555
#endif
namespace sw {
@@ -45,7 +45,9 @@
struct SamplerFunction
{
SamplerFunction(SamplerMethod method, bool offset = false, bool sample = false)
- : method(method), offset(offset), sample(sample)
+ : method(method)
+ , offset(offset)
+ , sample(sample)
{}
operator SamplerMethod() { return method; }
@@ -60,20 +62,20 @@
public:
SamplerCore(Pointer<Byte> &constants, const Sampler &state);
- Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4& sampleId, SamplerFunction function);
+ Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4 &sampleId, SamplerFunction function);
private:
Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod);
- Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
- Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
- Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
- Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
- Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
- Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
- Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
- Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
- Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
- Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+ Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
+ Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
+ Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+ Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+ Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+ Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
+ Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
+ Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+ Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
+ Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function);
Float log2sqrt(Float lod);
Float log2(Float lod);
void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Float4 &dsx, Float4 &dsy, SamplerFunction function);
@@ -81,15 +83,15 @@
void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, SamplerFunction function);
Int4 cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M);
Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
- void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, SamplerFunction function);
- void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, SamplerFunction function);
- Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function);
+ void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function);
+ void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function);
+ Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function);
Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer);
- Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function);
+ Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function);
Vector4f replaceBorderTexel(const Vector4f &c, Int4 valid);
void selectMipmap(const Pointer<Byte> &texture, Pointer<Byte> &mipmap, Pointer<Byte> &buffer, const Float &lod, bool secondLOD);
- Short4 address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap);
- void address(const Float4 &uw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
+ Short4 address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap);
+ void address(const Float4 &uw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
Int4 computeFilterOffset(Float &lod);
void convertSigned15(Float4 &cf, Short4 &ci);
@@ -120,21 +122,22 @@
#ifdef ENABLE_RR_PRINT
namespace rr {
-template <> struct PrintValue::Ty<sw::SamplerFunction>
+template<>
+struct PrintValue::Ty<sw::SamplerFunction>
{
- static std::string fmt(const sw::SamplerFunction& v)
+ static std::string fmt(const sw::SamplerFunction &v)
{
return std::string("SamplerFunction[") +
- "method: " + std::to_string(v.method) +
- ", offset: " + std::to_string(v.offset) +
- ", sample: " + std::to_string(v.sample) +
- "]";
+ "method: " + std::to_string(v.method) +
+ ", offset: " + std::to_string(v.offset) +
+ ", sample: " + std::to_string(v.sample) +
+ "]";
}
- static std::vector<rr::Value*> val(const sw::SamplerFunction& v) { return {}; }
+ static std::vector<rr::Value *> val(const sw::SamplerFunction &v) { return {}; }
};
} // namespace rr
-#endif // ENABLE_RR_PRINT
+#endif // ENABLE_RR_PRINT
-#endif // sw_SamplerCore_hpp
+#endif // sw_SamplerCore_hpp
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp
index d3307df..06dea29 100644
--- a/src/Pipeline/SetupRoutine.cpp
+++ b/src/Pipeline/SetupRoutine.cpp
@@ -12,18 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include <Device/Vertex.hpp>
#include "SetupRoutine.hpp"
+#include <Device/Vertex.hpp>
#include "Constants.hpp"
-#include "Device/Primitive.hpp"
#include "Device/Polygon.hpp"
+#include "Device/Primitive.hpp"
#include "Device/Renderer.hpp"
#include "Reactor/Reactor.hpp"
namespace sw {
-SetupRoutine::SetupRoutine(const SetupProcessor::State &state) : state(state)
+SetupRoutine::SetupRoutine(const SetupProcessor::State &state)
+ : state(state)
{
}
@@ -40,15 +41,15 @@
Pointer<Byte> polygon(function.Arg<2>());
Pointer<Byte> data(function.Arg<3>());
- Pointer<Byte> constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
+ Pointer<Byte> constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData, constants));
const bool point = state.isDrawPoint;
const bool line = state.isDrawLine;
const bool triangle = state.isDrawTriangle;
- const int V0 = OFFSET(Triangle,v0);
- const int V1 = (triangle || line) ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0);
- const int V2 = triangle ? OFFSET(Triangle,v2) : (line ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0));
+ const int V0 = OFFSET(Triangle, v0);
+ const int V1 = (triangle || line) ? OFFSET(Triangle, v1) : OFFSET(Triangle, v0);
+ const int V2 = triangle ? OFFSET(Triangle, v2) : (line ? OFFSET(Triangle, v1) : OFFSET(Triangle, v0));
Pointer<Byte> v0 = tri + V0;
Pointer<Byte> v1 = tri + V1;
@@ -57,15 +58,15 @@
Array<Int> X(16);
Array<Int> Y(16);
- X[0] = *Pointer<Int>(v0 + OFFSET(Vertex,projected.x));
- X[1] = *Pointer<Int>(v1 + OFFSET(Vertex,projected.x));
- X[2] = *Pointer<Int>(v2 + OFFSET(Vertex,projected.x));
+ X[0] = *Pointer<Int>(v0 + OFFSET(Vertex, projected.x));
+ X[1] = *Pointer<Int>(v1 + OFFSET(Vertex, projected.x));
+ X[2] = *Pointer<Int>(v2 + OFFSET(Vertex, projected.x));
- Y[0] = *Pointer<Int>(v0 + OFFSET(Vertex,projected.y));
- Y[1] = *Pointer<Int>(v1 + OFFSET(Vertex,projected.y));
- Y[2] = *Pointer<Int>(v2 + OFFSET(Vertex,projected.y));
+ Y[0] = *Pointer<Int>(v0 + OFFSET(Vertex, projected.y));
+ Y[1] = *Pointer<Int>(v1 + OFFSET(Vertex, projected.y));
+ Y[2] = *Pointer<Int>(v2 + OFFSET(Vertex, projected.y));
- Int d = 1; // Winding direction
+ Int d = 1; // Winding direction
// Culling
if(triangle)
@@ -78,7 +79,7 @@
Float y1 = Float(Y[1]);
Float y2 = Float(Y[2]);
- Float A = (y0 - y2) * x1 + (y2 - y1) * x0 + (y1 - y0) * x2; // Area
+ Float A = (y0 - y2) * x1 + (y2 - y1) * x0 + (y1 - y0) * x2; // Area
If(A == 0.0f)
{
@@ -106,40 +107,40 @@
If(frontFacing)
{
- *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
- *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+ *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
}
Else
{
- *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
- *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+ *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
}
}
else
{
- *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
- *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+ *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
}
- Int n = *Pointer<Int>(polygon + OFFSET(Polygon,n));
- Int m = *Pointer<Int>(polygon + OFFSET(Polygon,i));
+ Int n = *Pointer<Int>(polygon + OFFSET(Polygon, n));
+ Int m = *Pointer<Int>(polygon + OFFSET(Polygon, i));
- If(m != 0 || Bool(!triangle)) // Clipped triangle; reproject
+ If(m != 0 || Bool(!triangle)) // Clipped triangle; reproject
{
- Pointer<Byte> V = polygon + OFFSET(Polygon,P) + m * sizeof(void*) * 16;
+ Pointer<Byte> V = polygon + OFFSET(Polygon, P) + m * sizeof(void *) * 16;
Int i = 0;
Do
{
- Pointer<Float4> p = *Pointer<Pointer<Float4> >(V + i * sizeof(void*));
+ Pointer<Float4> p = *Pointer<Pointer<Float4> >(V + i * sizeof(void *));
Float4 v = *Pointer<Float4>(p, 16);
Float w = v.w;
Float rhw = IfThenElse(w != 0.0f, 1.0f / w, Float(1.0f));
- X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,X0xF)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,WxF)));
- Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,Y0xF)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,HxF)));
+ X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData, X0xF)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData, WxF)));
+ Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData, Y0xF)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData, HxF)));
i++;
}
@@ -176,8 +177,8 @@
yMax = (yMax + subPixM) >> subPixB;
}
- yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
- yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
+ yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData, scissorY0)));
+ yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData, scissorY1)));
// If yMin and yMax are initially negative, the scissor clamping above will typically result
// in yMin == 0 and yMax unchanged. We bail as we don't need to rasterize this primitive, and
@@ -201,16 +202,16 @@
if(state.multiSample > 1)
{
- Xq[i] = Xq[i] + *Pointer<Int>(constants + OFFSET(Constants,Xf) + q * sizeof(int));
- Yq[i] = Yq[i] + *Pointer<Int>(constants + OFFSET(Constants,Yf) + q * sizeof(int));
+ Xq[i] = Xq[i] + *Pointer<Int>(constants + OFFSET(Constants, Xf) + q * sizeof(int));
+ Yq[i] = Yq[i] + *Pointer<Int>(constants + OFFSET(Constants, Yf) + q * sizeof(int));
}
i++;
}
Until(i >= n);
- Pointer<Byte> leftEdge = Pointer<Byte>(primitive + OFFSET(Primitive,outline->left)) + q * sizeof(Primitive);
- Pointer<Byte> rightEdge = Pointer<Byte>(primitive + OFFSET(Primitive,outline->right)) + q * sizeof(Primitive);
+ Pointer<Byte> leftEdge = Pointer<Byte>(primitive + OFFSET(Primitive, outline->left)) + q * sizeof(Primitive);
+ Pointer<Byte> rightEdge = Pointer<Byte>(primitive + OFFSET(Primitive, outline->right)) + q * sizeof(Primitive);
if(state.multiSample > 1)
{
@@ -265,8 +266,8 @@
}
}
- *Pointer<Int>(primitive + OFFSET(Primitive,yMin)) = yMin;
- *Pointer<Int>(primitive + OFFSET(Primitive,yMax)) = yMax;
+ *Pointer<Int>(primitive + OFFSET(Primitive, yMin)) = yMin;
+ *Pointer<Int>(primitive + OFFSET(Primitive, yMax)) = yMax;
// Sort by minimum y
if(triangle)
@@ -305,15 +306,15 @@
w012.z = w2;
w012.w = 1;
- Float rhw0 = *Pointer<Float>(v0 + OFFSET(Vertex,projected.w));
+ Float rhw0 = *Pointer<Float>(v0 + OFFSET(Vertex, projected.w));
- Int X0 = *Pointer<Int>(v0 + OFFSET(Vertex,projected.x));
- Int X1 = *Pointer<Int>(v1 + OFFSET(Vertex,projected.x));
- Int X2 = *Pointer<Int>(v2 + OFFSET(Vertex,projected.x));
+ Int X0 = *Pointer<Int>(v0 + OFFSET(Vertex, projected.x));
+ Int X1 = *Pointer<Int>(v1 + OFFSET(Vertex, projected.x));
+ Int X2 = *Pointer<Int>(v2 + OFFSET(Vertex, projected.x));
- Int Y0 = *Pointer<Int>(v0 + OFFSET(Vertex,projected.y));
- Int Y1 = *Pointer<Int>(v1 + OFFSET(Vertex,projected.y));
- Int Y2 = *Pointer<Int>(v2 + OFFSET(Vertex,projected.y));
+ Int Y0 = *Pointer<Int>(v0 + OFFSET(Vertex, projected.y));
+ Int Y1 = *Pointer<Int>(v1 + OFFSET(Vertex, projected.y));
+ Int Y2 = *Pointer<Int>(v2 + OFFSET(Vertex, projected.y));
if(point)
{
@@ -347,8 +348,8 @@
Float4 xQuad = Float4(0, 1, 0, 1) - Float4(dx);
Float4 yQuad = Float4(0, 0, 1, 1) - Float4(dy);
- *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16) = xQuad;
- *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16) = yQuad;
+ *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16) = xQuad;
+ *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16) = yQuad;
Float4 M[3];
@@ -365,18 +366,18 @@
M[0].x = (y1 * w2 - y2 * w1) * D;
M[0].y = (x2 * w1 - x1 * w2) * D;
- // M[0].z = rhw0;
- // M[0].w = 0;
+ // M[0].z = rhw0;
+ // M[0].w = 0;
M[1].x = y2 * A;
M[1].y = -x2 * A;
- // M[1].z = 0;
- // M[1].w = 0;
+ // M[1].z = 0;
+ // M[1].w = 0;
M[2].x = -y1 * A;
M[2].y = x1 * A;
- // M[2].z = 0;
- // M[2].w = 0;
+ // M[2].z = 0;
+ // M[2].w = 0;
}
if(state.interpolateW)
@@ -387,16 +388,16 @@
Float4 B = ABC.y;
Float4 C = ABC.z;
- *Pointer<Float4>(primitive + OFFSET(Primitive,w.A), 16) = A;
- *Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16) = B;
- *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) = C;
+ *Pointer<Float4>(primitive + OFFSET(Primitive, w.A), 16) = A;
+ *Pointer<Float4>(primitive + OFFSET(Primitive, w.B), 16) = B;
+ *Pointer<Float4>(primitive + OFFSET(Primitive, w.C), 16) = C;
}
if(state.interpolateZ)
{
- Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex,projected.z));
- Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex,projected.z));
- Float z2 = *Pointer<Float>(v2 + OFFSET(Vertex,projected.z));
+ Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex, projected.z));
+ Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex, projected.z));
+ Float z2 = *Pointer<Float>(v2 + OFFSET(Vertex, projected.z));
z1 -= z0;
z2 -= z0;
@@ -412,7 +413,7 @@
Float x2 = Float(X2) * (1.0f / subPixF);
Float y2 = Float(Y2) * (1.0f / subPixF);
- Float D = *Pointer<Float>(data + OFFSET(DrawData,depthRange)) / (x1 * y2 - x2 * y1);
+ Float D = *Pointer<Float>(data + OFFSET(DrawData, depthRange)) / (x1 * y2 - x2 * y1);
Float a = (y2 * z1 - y1 * z2) * D;
Float b = (x1 * z2 - x2 * z1) * D;
@@ -426,22 +427,22 @@
B = Float4(0, 0, 0, 0);
}
- *Pointer<Float4>(primitive + OFFSET(Primitive,z.A), 16) = A;
- *Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16) = B;
+ *Pointer<Float4>(primitive + OFFSET(Primitive, z.A), 16) = A;
+ *Pointer<Float4>(primitive + OFFSET(Primitive, z.B), 16) = B;
Float c = z0;
if(state.applySlopeDepthBias)
{
Float bias = Max(Abs(Float(A.x)), Abs(Float(B.x)));
- bias *= *Pointer<Float>(data + OFFSET(DrawData,slopeDepthBias));
+ bias *= *Pointer<Float>(data + OFFSET(DrawData, slopeDepthBias));
c += bias;
}
- C = Float4(c * *Pointer<Float>(data + OFFSET(DrawData,depthRange)) + *Pointer<Float>(data + OFFSET(DrawData,depthNear)));
+ C = Float4(c * *Pointer<Float>(data + OFFSET(DrawData, depthRange)) + *Pointer<Float>(data + OFFSET(DrawData, depthNear)));
- *Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) = C;
+ *Pointer<Float4>(primitive + OFFSET(Primitive, z.C), 16) = C;
}
for(int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
@@ -449,27 +450,27 @@
if(state.gradient[interpolant].Type != SpirvShader::ATTRIBTYPE_UNUSED)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
- OFFSET(Vertex, v[interpolant]),
- OFFSET(Primitive, V[interpolant]),
- state.gradient[interpolant].Flat,
- !state.gradient[interpolant].NoPerspective);
+ OFFSET(Vertex, v[interpolant]),
+ OFFSET(Primitive, V[interpolant]),
+ state.gradient[interpolant].Flat,
+ !state.gradient[interpolant].NoPerspective);
}
}
for(unsigned int i = 0; i < state.numClipDistances; i++)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
- OFFSET(Vertex, clipDistance[i]),
- OFFSET(Primitive, clipDistance[i]),
- false, true);
+ OFFSET(Vertex, clipDistance[i]),
+ OFFSET(Primitive, clipDistance[i]),
+ false, true);
}
for(unsigned int i = 0; i < state.numCullDistances; i++)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
- OFFSET(Vertex, cullDistance[i]),
- OFFSET(Primitive, cullDistance[i]),
- false, true);
+ OFFSET(Vertex, cullDistance[i]),
+ OFFSET(Primitive, cullDistance[i]),
+ false, true);
}
Return(1);
@@ -509,7 +510,7 @@
}
else
{
- int leadingVertex = OFFSET(Triangle,v0);
+ int leadingVertex = OFFSET(Triangle, v0);
Float C = *Pointer<Float>(triangle + leadingVertex + attribute);
*Pointer<Float4>(primitive + planeEquation + 0, 16) = Float4(0, 0, 0, 0);
@@ -532,16 +533,16 @@
constexpr int subPixB = vk::SUBPIXEL_PRECISION_BITS;
constexpr int subPixM = vk::SUBPIXEL_PRECISION_MASK;
- Int y1 = Max((Y1 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
- Int y2 = Min((Y2 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
+ Int y1 = Max((Y1 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData, scissorY0)));
+ Int y2 = Min((Y2 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData, scissorY1)));
If(y1 < y2)
{
- Int xMin = *Pointer<Int>(data + OFFSET(DrawData,scissorX0));
- Int xMax = *Pointer<Int>(data + OFFSET(DrawData,scissorX1));
+ Int xMin = *Pointer<Int>(data + OFFSET(DrawData, scissorX0));
+ Int xMax = *Pointer<Int>(data + OFFSET(DrawData, scissorX1));
- Pointer<Byte> leftEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left);
- Pointer<Byte> rightEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right);
+ Pointer<Byte> leftEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->left);
+ Pointer<Byte> rightEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->right);
Pointer<Byte> edge = IfThenElse(swap, rightEdge, leftEdge);
// Deltas
@@ -552,19 +553,19 @@
Int FDY12 = DY12 << subPixB;
Int X = DX12 * ((y1 << subPixB) - Y1) + (X1 & subPixM) * DY12;
- Int x = (X1 >> subPixB) + X / FDY12; // Edge
- Int d = X % FDY12; // Error-term
- Int ceil = -d >> 31; // Ceiling division: remainder <= 0
+ Int x = (X1 >> subPixB) + X / FDY12; // Edge
+ Int d = X % FDY12; // Error-term
+ Int ceil = -d >> 31; // Ceiling division: remainder <= 0
x -= ceil;
d -= ceil & FDY12;
- Int Q = FDX12 / FDY12; // Edge-step
- Int R = FDX12 % FDY12; // Error-step
- Int floor = R >> 31; // Flooring division: remainder >= 0
+ Int Q = FDX12 / FDY12; // Edge-step
+ Int R = FDX12 % FDY12; // Error-step
+ Int floor = R >> 31; // Flooring division: remainder >= 0
Q += floor;
R += floor & FDY12;
- Int D = FDY12; // Error-overflow
+ Int D = FDY12; // Error-overflow
Int y = y1;
Do
@@ -588,7 +589,7 @@
void SetupRoutine::conditionalRotate1(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2)
{
- #if 0 // Rely on LLVM optimization
+#if 0 // Rely on LLVM optimization
If(condition)
{
Pointer<Byte> vX;
@@ -598,17 +599,17 @@
v1 = v2;
v2 = vX;
}
- #else
- Pointer<Byte> vX = v0;
- v0 = IfThenElse(condition, v1, v0);
- v1 = IfThenElse(condition, v2, v1);
- v2 = IfThenElse(condition, vX, v2);
- #endif
+#else
+ Pointer<Byte> vX = v0;
+ v0 = IfThenElse(condition, v1, v0);
+ v1 = IfThenElse(condition, v2, v1);
+ v2 = IfThenElse(condition, vX, v2);
+#endif
}
void SetupRoutine::conditionalRotate2(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2)
{
- #if 0 // Rely on LLVM optimization
+#if 0 // Rely on LLVM optimization
If(condition)
{
Pointer<Byte> vX;
@@ -618,12 +619,12 @@
v1 = v0;
v0 = vX;
}
- #else
- Pointer<Byte> vX = v2;
- v2 = IfThenElse(condition, v1, v2);
- v1 = IfThenElse(condition, v0, v1);
- v0 = IfThenElse(condition, vX, v0);
- #endif
+#else
+ Pointer<Byte> vX = v2;
+ v2 = IfThenElse(condition, v1, v2);
+ v1 = IfThenElse(condition, v0, v1);
+ v0 = IfThenElse(condition, vX, v0);
+#endif
}
SetupFunction::RoutineType SetupRoutine::getRoutine()
diff --git a/src/Pipeline/SetupRoutine.hpp b/src/Pipeline/SetupRoutine.hpp
index 59fe55a..b8b399b 100644
--- a/src/Pipeline/SetupRoutine.hpp
+++ b/src/Pipeline/SetupRoutine.hpp
@@ -45,4 +45,4 @@
} // namespace sw
-#endif // sw_SetupRoutine_hpp
+#endif // sw_SetupRoutine_hpp
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index 65b2084..534ed8d 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -55,10 +55,10 @@
{
switch(i)
{
- case 0: return x;
- case 1: return y;
- case 2: return z;
- case 3: return w;
+ case 0: return x;
+ case 1: return y;
+ case 2: return z;
+ case 3: return w;
}
return x;
@@ -98,10 +98,10 @@
{
switch(i)
{
- case 0: return x;
- case 1: return y;
- case 2: return z;
- case 3: return w;
+ case 0: return x;
+ case 1: return y;
+ case 2: return z;
+ case 3: return w;
}
return x;
@@ -116,20 +116,20 @@
// the IEEE-754 floating-point number. Clamp to prevent overflow
// past the representation of infinity.
Float4 x0 = x;
- x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f
- x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f
+ x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f
+ x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f
Int4 i = RoundInt(x0 - Float4(0.5f));
- Float4 ii = As<Float4>((i + Int4(127)) << 23); // Add single-precision bias, and shift into exponent.
+ Float4 ii = As<Float4>((i + Int4(127)) << 23); // Add single-precision bias, and shift into exponent.
// For the fractional part use a polynomial
// which approximates 2^f in the 0 to 1 range.
Float4 f = x0 - Float4(i);
- Float4 ff = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f
- ff = ff * f + As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f
- ff = ff * f + As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f
- ff = ff * f + As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f
- ff = ff * f + As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f
+ Float4 ff = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f
+ ff = ff * f + As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f
+ ff = ff * f + As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f
+ ff = ff * f + As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f
+ ff = ff * f + As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f
ff = ff * f + Float4(1.0f);
return ii * ff;
@@ -147,7 +147,7 @@
x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000));
x1 = As<Float4>(As<UInt4>(x1) >> 8);
x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f)));
- x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f;
+ x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f;
x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f)));
x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f);
@@ -163,13 +163,13 @@
Float4 exponential(RValue<Float4> x, bool pp)
{
// FIXME: Propagate the constant
- return exponential2(Float4(1.44269504f) * x, pp); // 1/ln(2)
+ return exponential2(Float4(1.44269504f) * x, pp); // 1/ln(2)
}
Float4 logarithm(RValue<Float4> x, bool pp)
{
// FIXME: Propagate the constant
- return Float4(6.93147181e-1f) * logarithm2(x, pp); // ln(2)
+ return Float4(6.93147181e-1f) * logarithm2(x, pp); // ln(2)
}
Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp)
@@ -191,7 +191,7 @@
if(finite)
{
int big = 0x7F7FFFFF;
- rcp = Min(rcp, Float4((float&)big));
+ rcp = Min(rcp, Float4((float &)big));
}
return rcp;
@@ -234,8 +234,8 @@
Float4 sine_pi(RValue<Float4> x, bool pp)
{
- const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2
- const Float4 B = Float4(1.27323954e+0f); // 4/pi
+ const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2
+ const Float4 B = Float4(1.27323954e+0f); // 4/pi
const Float4 C = Float4(7.75160950e-1f);
const Float4 D = Float4(2.24839049e-1f);
@@ -265,7 +265,7 @@
Float4 sine(RValue<Float4> x, bool pp)
{
// Reduce to [-0.5, 0.5] range
- Float4 y = x * Float4(1.59154943e-1f); // 1/2pi
+ Float4 y = x * Float4(1.59154943e-1f); // 1/2pi
y = y - Round(y);
if(!pp)
@@ -320,10 +320,10 @@
Float4 arcsin(RValue<Float4> x, bool pp)
{
- if(false) // Simpler implementation fails even lowp precision tests
+ if(false) // Simpler implementation fails even lowp precision tests
{
// x*(pi/2-sqrt(1-x*x)*pi/5)
- return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x*x) * Float4(6.28318531e-1f));
+ return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x * x) * Float4(6.28318531e-1f));
}
else
{
@@ -335,7 +335,7 @@
const Float4 a3(-0.0187293f);
Float4 absx = Abs(x);
return As<Float4>(As<Int4>(half_pi - Sqrt(Float4(1.0f) - absx) * (a0 + absx * (a1 + absx * (a2 + absx * a3)))) ^
- (As<Int4>(x) & Int4(0x80000000)));
+ (As<Int4>(x) & Int4(0x80000000)));
}
}
@@ -366,20 +366,20 @@
{
Float4 absx = Abs(x);
Int4 O = CmpNLT(absx, Float4(1.0f));
- Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) | (~O & As<Int4>(absx))); // FIXME: Vector select
+ Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) | (~O & As<Int4>(absx))); // FIXME: Vector select
const Float4 half_pi(1.57079632f);
Float4 theta = arctan_01(y, pp);
- return As<Float4>(((O & As<Int4>(half_pi - theta)) | (~O & As<Int4>(theta))) ^ // FIXME: Vector select
- (As<Int4>(x) & Int4(0x80000000)));
+ return As<Float4>(((O & As<Int4>(half_pi - theta)) | (~O & As<Int4>(theta))) ^ // FIXME: Vector select
+ (As<Int4>(x) & Int4(0x80000000)));
}
Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp)
{
- const Float4 pi(3.14159265f); // pi
- const Float4 minus_pi(-3.14159265f); // -pi
- const Float4 half_pi(1.57079632f); // pi/2
- const Float4 quarter_pi(7.85398163e-1f); // pi/4
+ const Float4 pi(3.14159265f); // pi
+ const Float4 minus_pi(-3.14159265f); // -pi
+ const Float4 half_pi(1.57079632f); // pi/2
+ const Float4 quarter_pi(7.85398163e-1f); // pi/4
// Rotate to upper semicircle when in lower semicircle
Int4 S = CmpLT(y, Float4(0.0f));
@@ -390,24 +390,24 @@
// Rotate to right quadrant when in left quadrant
Int4 Q = CmpLT(x0, Float4(0.0f));
theta += As<Float4>(Q & As<Int4>(half_pi));
- Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0))); // FIXME: Vector select
- Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0))); // FIXME: Vector select
+ Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0))); // FIXME: Vector select
+ Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0))); // FIXME: Vector select
// Mirror to first octant when in second octant
Int4 O = CmpNLT(y1, x1);
- Float4 x2 = As<Float4>((O & As<Int4>(y1)) | (~O & As<Int4>(x1))); // FIXME: Vector select
- Float4 y2 = As<Float4>((O & As<Int4>(x1)) | (~O & As<Int4>(y1))); // FIXME: Vector select
+ Float4 x2 = As<Float4>((O & As<Int4>(y1)) | (~O & As<Int4>(x1))); // FIXME: Vector select
+ Float4 y2 = As<Float4>((O & As<Int4>(x1)) | (~O & As<Int4>(y1))); // FIXME: Vector select
// Approximation of atan in [0..1]
Int4 zero_x = CmpEQ(x2, Float4(0.0f));
- Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4
+ Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4
Float4 atan2_theta = arctan_01(y2 / x2, pp);
- theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) | (~O & (As<Int4>(atan2_theta))))) | // FIXME: Vector select
+ theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) | (~O & (As<Int4>(atan2_theta))))) | // FIXME: Vector select
(inf_y & As<Int4>(quarter_pi)));
// Recover loss of precision for tiny theta angles
- Int4 precision_loss = S & Q & O & ~inf_y; // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta
- return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) | (~precision_loss & As<Int4>(theta))); // FIXME: Vector select
+ Int4 precision_loss = S & Q & O & ~inf_y; // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta
+ return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) | (~precision_loss & As<Int4>(theta))); // FIXME: Vector select
}
Float4 sineh(RValue<Float4> x, bool pp)
@@ -539,10 +539,10 @@
{
switch(N)
{
- case 1: transpose4x1(row0, row1, row2, row3); break;
- case 2: transpose4x2(row0, row1, row2, row3); break;
- case 3: transpose4x3(row0, row1, row2, row3); break;
- case 4: transpose4x4(row0, row1, row2, row3); break;
+ case 1: transpose4x1(row0, row1, row2, row3); break;
+ case 2: transpose4x2(row0, row1, row2, row3); break;
+ case 3: transpose4x3(row0, row1, row2, row3); break;
+ case 4: transpose4x4(row0, row1, row2, row3); break;
}
}
@@ -551,15 +551,15 @@
auto magic = SIMD::UInt(126 << 23);
auto sign16 = halfBits & SIMD::UInt(0x8000);
- auto man16 = halfBits & SIMD::UInt(0x03FF);
- auto exp16 = halfBits & SIMD::UInt(0x7C00);
+ auto man16 = halfBits & SIMD::UInt(0x03FF);
+ auto exp16 = halfBits & SIMD::UInt(0x7C00);
auto isDnormOrZero = CmpEQ(exp16, SIMD::UInt(0));
auto isInfOrNaN = CmpEQ(exp16, SIMD::UInt(0x7C00));
auto sign32 = sign16 << 16;
- auto man32 = man16 << 13;
- auto exp32 = (exp16 + SIMD::UInt(0x1C000)) << 13;
+ auto man32 = man16 << 13;
+ auto exp32 = (exp16 + SIMD::UInt(0x1C000)) << 13;
auto norm32 = (man32 | exp32) | (isInfOrNaN & SIMD::UInt(0x7F800000));
auto denorm32 = As<SIMD::UInt>(As<SIMD::Float>(magic + man16) - As<SIMD::Float>(magic));
@@ -584,7 +584,10 @@
// Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
// instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)),
- As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) |
+ As<SIMD::Float>(SIMD::UInt(c_clamp))))) -
+ SIMD::UInt(mask_round)) >>
+ 13) &
+ b_isnormal) |
((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) &
((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) | SIMD::UInt(c_infty_as_fp16)));
@@ -610,7 +613,7 @@
{
SIMD::UInt halfBits = floatToHalfBits(As<SIMD::UInt>(value), true) &
SIMD::UInt(0x7FF00000, 0x7FF00000, 0x7FE00000, 0);
- return (UInt(halfBits.x) >> 20) | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1);
+ return (UInt(halfBits.x) >> 20) | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1);
}
rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
@@ -670,9 +673,9 @@
// Performs a fused-multiply add, returning a * b + c.
rr::RValue<sw::SIMD::Float> FMA(
- rr::RValue<sw::SIMD::Float> const &a,
- rr::RValue<sw::SIMD::Float> const &b,
- rr::RValue<sw::SIMD::Float> const &c)
+ rr::RValue<sw::SIMD::Float> const &a,
+ rr::RValue<sw::SIMD::Float> const &b,
+ rr::RValue<sw::SIMD::Float> const &c)
{
return a * b + c;
}
@@ -694,12 +697,12 @@
auto xIsNan = IsNan(x);
auto yIsNan = IsNan(y);
return As<sw::SIMD::Float>(
- // If neither are NaN, return min
- ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) |
- // If one operand is a NaN, the other operand is the result
- // If both operands are NaN, the result is a NaN.
- ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
- (( xIsNan ) & As<sw::SIMD::Int>(y)));
+ // If neither are NaN, return min
+ ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) |
+ // If one operand is a NaN, the other operand is the result
+ // If both operands are NaN, the result is a NaN.
+ ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
+ ((xIsNan)&As<sw::SIMD::Int>(y)));
}
// Returns y if y > x; otherwise result is x.
@@ -711,37 +714,37 @@
auto xIsNan = IsNan(x);
auto yIsNan = IsNan(y);
return As<sw::SIMD::Float>(
- // If neither are NaN, return max
- ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) |
- // If one operand is a NaN, the other operand is the result
- // If both operands are NaN, the result is a NaN.
- ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
- (( xIsNan ) & As<sw::SIMD::Int>(y)));
+ // If neither are NaN, return max
+ ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) |
+ // If one operand is a NaN, the other operand is the result
+ // If both operands are NaN, the result is a NaN.
+ ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
+ ((xIsNan)&As<sw::SIMD::Int>(y)));
}
// Returns the determinant of a 2x2 matrix.
rr::RValue<sw::SIMD::Float> Determinant(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
- rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
+ rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
{
- return a*d - b*c;
+ return a * d - b * c;
}
// Returns the determinant of a 3x3 matrix.
rr::RValue<sw::SIMD::Float> Determinant(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
- rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
- rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
+ rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
+ rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
{
- return a*e*i + b*f*g + c*d*h - c*e*g - b*d*i - a*f*h;
+ return a * e * i + b * f * g + c * d * h - c * e * g - b * d * i - a * f * h;
}
// Returns the determinant of a 4x4 matrix.
rr::RValue<sw::SIMD::Float> Determinant(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
- rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
- rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
- rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
+ rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
+ rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
+ rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
{
return a * Determinant(f, g, h,
j, k, l,
@@ -759,108 +762,130 @@
// Returns the inverse of a 2x2 matrix.
std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
- rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
+ rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
{
auto s = sw::SIMD::Float(1.0f) / Determinant(a, b, c, d);
- return {{s*d, -s*b, -s*c, s*a}};
+ return { { s * d, -s * b, -s * c, s * a } };
}
// Returns the inverse of a 3x3 matrix.
std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
- rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
- rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
+ rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
+ rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
{
auto s = sw::SIMD::Float(1.0f) / Determinant(
- a, b, c,
- d, e, f,
- g, h, i); // TODO: duplicate arithmetic calculating the det and below.
+ a, b, c,
+ d, e, f,
+ g, h, i); // TODO: duplicate arithmetic calculating the det and below.
- return {{
- s * (e*i - f*h), s * (c*h - b*i), s * (b*f - c*e),
- s * (f*g - d*i), s * (a*i - c*g), s * (c*d - a*f),
- s * (d*h - e*g), s * (b*g - a*h), s * (a*e - b*d),
- }};
+ return { {
+ s * (e * i - f * h),
+ s * (c * h - b * i),
+ s * (b * f - c * e),
+ s * (f * g - d * i),
+ s * (a * i - c * g),
+ s * (c * d - a * f),
+ s * (d * h - e * g),
+ s * (b * g - a * h),
+ s * (a * e - b * d),
+ } };
}
// Returns the inverse of a 4x4 matrix.
std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
- rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
- rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
- rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
+ rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
+ rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
+ rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
{
auto s = sw::SIMD::Float(1.0f) / Determinant(
- a, b, c, d,
- e, f, g, h,
- i, j, k, l,
- m, n, o, p); // TODO: duplicate arithmetic calculating the det and below.
+ a, b, c, d,
+ e, f, g, h,
+ i, j, k, l,
+ m, n, o, p); // TODO: duplicate arithmetic calculating the det and below.
- auto kplo = k*p - l*o, jpln = j*p - l*n, jokn = j*o - k*n;
- auto gpho = g*p - h*o, fphn = f*p - h*n, fogn = f*o - g*n;
- auto glhk = g*l - h*k, flhj = f*l - h*j, fkgj = f*k - g*j;
- auto iplm = i*p - l*m, iokm = i*o - k*m, ephm = e*p - h*m;
- auto eogm = e*o - g*m, elhi = e*l - h*i, ekgi = e*k - g*i;
- auto injm = i*n - j*m, enfm = e*n - f*m, ejfi = e*j - f*i;
+ auto kplo = k * p - l * o, jpln = j * p - l * n, jokn = j * o - k * n;
+ auto gpho = g * p - h * o, fphn = f * p - h * n, fogn = f * o - g * n;
+ auto glhk = g * l - h * k, flhj = f * l - h * j, fkgj = f * k - g * j;
+ auto iplm = i * p - l * m, iokm = i * o - k * m, ephm = e * p - h * m;
+ auto eogm = e * o - g * m, elhi = e * l - h * i, ekgi = e * k - g * i;
+ auto injm = i * n - j * m, enfm = e * n - f * m, ejfi = e * j - f * i;
- return {{
- s * ( f * kplo - g * jpln + h * jokn),
+ return { {
+ s * (f * kplo - g * jpln + h * jokn),
s * (-b * kplo + c * jpln - d * jokn),
- s * ( b * gpho - c * fphn + d * fogn),
+ s * (b * gpho - c * fphn + d * fogn),
s * (-b * glhk + c * flhj - d * fkgj),
s * (-e * kplo + g * iplm - h * iokm),
- s * ( a * kplo - c * iplm + d * iokm),
+ s * (a * kplo - c * iplm + d * iokm),
s * (-a * gpho + c * ephm - d * eogm),
- s * ( a * glhk - c * elhi + d * ekgi),
+ s * (a * glhk - c * elhi + d * ekgi),
- s * ( e * jpln - f * iplm + h * injm),
+ s * (e * jpln - f * iplm + h * injm),
s * (-a * jpln + b * iplm - d * injm),
- s * ( a * fphn - b * ephm + d * enfm),
+ s * (a * fphn - b * ephm + d * enfm),
s * (-a * flhj + b * elhi - d * ejfi),
s * (-e * jokn + f * iokm - g * injm),
- s * ( a * jokn - b * iokm + c * injm),
+ s * (a * jokn - b * iokm + c * injm),
s * (-a * fogn + b * eogm - c * enfm),
- s * ( a * fkgj - b * ekgi + c * ejfi),
- }};
+ s * (a * fkgj - b * ekgi + c * ejfi),
+ } };
}
namespace SIMD {
Pointer::Pointer(rr::Pointer<Byte> base, rr::Int limit)
- : base(base),
- dynamicLimit(limit), staticLimit(0),
- dynamicOffsets(0), staticOffsets{},
- hasDynamicLimit(true), hasDynamicOffsets(false) {}
+ : base(base)
+ , dynamicLimit(limit)
+ , staticLimit(0)
+ , dynamicOffsets(0)
+ , staticOffsets{}
+ , hasDynamicLimit(true)
+ , hasDynamicOffsets(false)
+{}
Pointer::Pointer(rr::Pointer<Byte> base, unsigned int limit)
- : base(base),
- dynamicLimit(0), staticLimit(limit),
- dynamicOffsets(0), staticOffsets{},
- hasDynamicLimit(false), hasDynamicOffsets(false) {}
+ : base(base)
+ , dynamicLimit(0)
+ , staticLimit(limit)
+ , dynamicOffsets(0)
+ , staticOffsets{}
+ , hasDynamicLimit(false)
+ , hasDynamicOffsets(false)
+{}
Pointer::Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
- : base(base),
- dynamicLimit(limit), staticLimit(0),
- dynamicOffsets(offset), staticOffsets{},
- hasDynamicLimit(true), hasDynamicOffsets(true) {}
+ : base(base)
+ , dynamicLimit(limit)
+ , staticLimit(0)
+ , dynamicOffsets(offset)
+ , staticOffsets{}
+ , hasDynamicLimit(true)
+ , hasDynamicOffsets(true)
+{}
Pointer::Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset)
- : base(base),
- dynamicLimit(0), staticLimit(limit),
- dynamicOffsets(offset), staticOffsets{},
- hasDynamicLimit(false), hasDynamicOffsets(true) {}
+ : base(base)
+ , dynamicLimit(0)
+ , staticLimit(limit)
+ , dynamicOffsets(offset)
+ , staticOffsets{}
+ , hasDynamicLimit(false)
+ , hasDynamicOffsets(true)
+{}
-Pointer& Pointer::operator += (Int i)
+Pointer &Pointer::operator+=(Int i)
{
dynamicOffsets += i;
hasDynamicOffsets = true;
return *this;
}
-Pointer& Pointer::operator *= (Int i)
+Pointer &Pointer::operator*=(Int i)
{
dynamicOffsets = offsets() * i;
staticOffsets = {};
@@ -868,16 +893,26 @@
return *this;
}
-Pointer Pointer::operator + (SIMD::Int i) { Pointer p = *this; p += i; return p; }
-Pointer Pointer::operator * (SIMD::Int i) { Pointer p = *this; p *= i; return p; }
+Pointer Pointer::operator+(SIMD::Int i)
+{
+ Pointer p = *this;
+ p += i;
+ return p;
+}
+Pointer Pointer::operator*(SIMD::Int i)
+{
+ Pointer p = *this;
+ p *= i;
+ return p;
+}
-Pointer& Pointer::operator += (int i)
+Pointer &Pointer::operator+=(int i)
{
for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; }
return *this;
}
-Pointer& Pointer::operator *= (int i)
+Pointer &Pointer::operator*=(int i)
{
for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] *= i; }
if(hasDynamicOffsets)
@@ -887,8 +922,18 @@
return *this;
}
-Pointer Pointer::operator + (int i) { Pointer p = *this; p += i; return p; }
-Pointer Pointer::operator * (int i) { Pointer p = *this; p *= i; return p; }
+Pointer Pointer::operator+(int i)
+{
+ Pointer p = *this;
+ p += i;
+ return p;
+}
+Pointer Pointer::operator*(int i)
+{
+ Pointer p = *this;
+ p *= i;
+ return p;
+}
SIMD::Int Pointer::offsets() const
{
@@ -910,10 +955,10 @@
// Common fast paths.
static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
return SIMD::Int(
- (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
- (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
- (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
- (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0);
+ (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
+ (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
+ (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
+ (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0);
}
return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit()));
@@ -932,14 +977,14 @@
{
switch(robustness)
{
- case OutOfBoundsBehavior::UndefinedBehavior:
- // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
- // but since it can't know in advance which branches are taken this must be true even for inactives lanes.
- return true;
- case OutOfBoundsBehavior::Nullify:
- case OutOfBoundsBehavior::RobustBufferAccess:
- case OutOfBoundsBehavior::UndefinedValue:
- return false;
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
+ // but since it can't know in advance which branches are taken this must be true even for inactives lanes.
+ return true;
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess:
+ case OutOfBoundsBehavior::UndefinedValue:
+ return false;
}
}
}
@@ -968,7 +1013,7 @@
{
auto o = offsets();
static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
- return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1*step, 2*step, 3*step, 0))) == 0;
+ return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1 * step, 2 * step, 3 * step, 0))) == 0;
}
return hasStaticSequentialOffsets(step);
}
@@ -983,7 +1028,7 @@
}
for(int i = 1; i < SIMD::Width; i++)
{
- if(staticOffsets[i-1] + int32_t(step) != staticOffsets[i]) { return false; }
+ if(staticOffsets[i - 1] + int32_t(step) != staticOffsets[i]) { return false; }
}
return true;
}
@@ -1010,7 +1055,7 @@
}
for(int i = 1; i < SIMD::Width; i++)
{
- if(staticOffsets[i-1] != staticOffsets[i]) { return false; }
+ if(staticOffsets[i - 1] != staticOffsets[i]) { return false; }
}
return true;
}
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index 4026d27..a911be2 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp
@@ -19,9 +19,9 @@
#include "Reactor/Reactor.hpp"
#include "Vulkan/VkDebug.hpp"
-#include <atomic> // std::memory_order
#include <array>
-#include <utility> // std::pair
+#include <atomic> // std::memory_order
+#include <utility> // std::pair
namespace sw {
@@ -87,17 +87,17 @@
Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset);
Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset);
- Pointer& operator += (Int i);
- Pointer& operator *= (Int i);
+ Pointer &operator+=(Int i);
+ Pointer &operator*=(Int i);
- Pointer operator + (SIMD::Int i);
- Pointer operator * (SIMD::Int i);
+ Pointer operator+(SIMD::Int i);
+ Pointer operator*(SIMD::Int i);
- Pointer& operator += (int i);
- Pointer& operator *= (int i);
+ Pointer &operator+=(int i);
+ Pointer &operator*=(int i);
- Pointer operator + (int i);
- Pointer operator * (int i);
+ Pointer operator+(int i);
+ Pointer operator*(int i);
SIMD::Int offsets() const;
@@ -135,23 +135,37 @@
rr::Pointer<rr::Byte> base;
// Upper (non-inclusive) limit for offsets from base.
- rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero.
+ rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero.
unsigned int staticLimit;
// Per lane offsets from base.
- SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
+ SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
std::array<int32_t, SIMD::Width> staticOffsets;
bool hasDynamicLimit; // True if dynamicLimit is non-zero.
bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero.
};
-template <typename T> struct Element {};
-template <> struct Element<Float> { using type = rr::Float; };
-template <> struct Element<Int> { using type = rr::Int; };
-template <> struct Element<UInt> { using type = rr::UInt; };
+template<typename T>
+struct Element
+{};
+template<>
+struct Element<Float>
+{
+ using type = rr::Float;
+};
+template<>
+struct Element<Int>
+{
+ using type = rr::Int;
+};
+template<>
+struct Element<UInt>
+{
+ using type = rr::UInt;
+};
-} // namespace SIMD
+} // namespace SIMD
Float4 exponential2(RValue<Float4> x, bool pp = false);
Float4 logarithm2(RValue<Float4> x, bool pp = false);
@@ -161,8 +175,8 @@
Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
Float4 modulo(RValue<Float4> x, RValue<Float4> y);
-Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
-Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
+Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
+Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
Float4 sine(RValue<Float4> x, bool pp = false);
Float4 cosine(RValue<Float4> x, bool pp = false);
Float4 tangent(RValue<Float4> x, bool pp = false);
@@ -199,10 +213,10 @@
rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints);
-template <typename T>
+template<typename T>
inline rr::RValue<T> AndAll(rr::RValue<T> const &mask);
-template <typename T>
+template<typename T>
inline rr::RValue<T> OrAll(rr::RValue<T> const &mask);
rr::RValue<sw::SIMD::Float> Sign(rr::RValue<sw::SIMD::Float> const &val);
@@ -224,9 +238,9 @@
// Performs a fused-multiply add, returning a * b + c.
rr::RValue<sw::SIMD::Float> FMA(
- rr::RValue<sw::SIMD::Float> const &a,
- rr::RValue<sw::SIMD::Float> const &b,
- rr::RValue<sw::SIMD::Float> const &c);
+ rr::RValue<sw::SIMD::Float> const &a,
+ rr::RValue<sw::SIMD::Float> const &b,
+ rr::RValue<sw::SIMD::Float> const &c);
// Returns the exponent of the floating point number f.
// Assumes IEEE 754
@@ -244,39 +258,39 @@
// Returns the determinant of a 2x2 matrix.
rr::RValue<sw::SIMD::Float> Determinant(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
- rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
+ rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
// Returns the determinant of a 3x3 matrix.
rr::RValue<sw::SIMD::Float> Determinant(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
- rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
- rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
+ rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
+ rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
// Returns the determinant of a 4x4 matrix.
rr::RValue<sw::SIMD::Float> Determinant(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
- rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
- rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
- rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
+ rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
+ rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
+ rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
// Returns the inverse of a 2x2 matrix.
std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
- rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
+ rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
// Returns the inverse of a 3x3 matrix.
std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
- rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
- rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
+ rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
+ rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
// Returns the inverse of a 4x4 matrix.
std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse(
- rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
- rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
- rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
- rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
+ rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
+ rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
+ rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
+ rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
////////////////////////////////////////////////////////////////////////////
// Inline functions
@@ -307,14 +321,14 @@
{
switch(robustness)
{
- case OutOfBoundsBehavior::Nullify:
- case OutOfBoundsBehavior::RobustBufferAccess:
- case OutOfBoundsBehavior::UndefinedValue:
- mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads.
- break;
- case OutOfBoundsBehavior::UndefinedBehavior:
- // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
- break;
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess:
+ case OutOfBoundsBehavior::UndefinedValue:
+ mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads.
+ break;
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+ break;
}
}
@@ -339,14 +353,14 @@
bool zeroMaskedLanes = true;
switch(robustness)
{
- case OutOfBoundsBehavior::Nullify:
- case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero.
- zeroMaskedLanes = true;
- break;
- case OutOfBoundsBehavior::UndefinedValue:
- case OutOfBoundsBehavior::UndefinedBehavior:
- zeroMaskedLanes = false;
- break;
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero.
+ zeroMaskedLanes = true;
+ break;
+ case OutOfBoundsBehavior::UndefinedValue:
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ zeroMaskedLanes = false;
+ break;
}
if(hasStaticSequentialOffsets(sizeof(float)))
@@ -399,14 +413,14 @@
switch(robustness)
{
- case OutOfBoundsBehavior::Nullify:
- case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking.
- case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access.
- mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes.
- break;
- case OutOfBoundsBehavior::UndefinedBehavior:
- // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
- break;
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking.
+ case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access.
+ mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes.
+ break;
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+ break;
}
if(!atomic && order == std::memory_order_relaxed)
@@ -420,9 +434,9 @@
auto elect = mask & ~(v0111 & (mask.xxyz | mask.xxxy | mask.xxxx));
auto maskedVal = As<SIMD::Int>(val) & elect;
auto scalarVal = Extract(maskedVal, 0) |
- Extract(maskedVal, 1) |
- Extract(maskedVal, 2) |
- Extract(maskedVal, 3);
+ Extract(maskedVal, 1) |
+ Extract(maskedVal, 2) |
+ Extract(maskedVal, 3);
*rr::Pointer<EL>(base + staticOffsets[0], alignment) = As<EL>(scalarVal);
}
}
@@ -476,29 +490,30 @@
Store(T(val), robustness, mask, atomic, order);
}
-template <typename T>
+template<typename T>
inline rr::RValue<T> AndAll(rr::RValue<T> const &mask)
{
- T v1 = mask; // [x] [y] [z] [w]
- T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw]
- return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
+ T v1 = mask; // [x] [y] [z] [w]
+ T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw]
+ return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
}
-template <typename T>
+template<typename T>
inline rr::RValue<T> OrAll(rr::RValue<T> const &mask)
{
- T v1 = mask; // [x] [y] [z] [w]
- T v2 = v1.xzxz | v1.ywyw; // [xy] [zw] [xy] [zw]
- return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
+ T v1 = mask; // [x] [y] [z] [w]
+ T v2 = v1.xzxz | v1.ywyw; // [xy] [zw] [xy] [zw]
+ return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
}
-} // namespace sw
+} // namespace sw
#ifdef ENABLE_RR_PRINT
namespace rr {
-template <> struct PrintValue::Ty<sw::Vector4f>
+template<>
+struct PrintValue::Ty<sw::Vector4f>
{
- static std::string fmt(const sw::Vector4f& v)
+ static std::string fmt(const sw::Vector4f &v)
{
return "[x: " + PrintValue::fmt(v.x) +
", y: " + PrintValue::fmt(v.y) +
@@ -506,14 +521,15 @@
", w: " + PrintValue::fmt(v.w) + "]";
}
- static std::vector<rr::Value*> val(const sw::Vector4f& v)
+ static std::vector<rr::Value *> val(const sw::Vector4f &v)
{
return PrintValue::vals(v.x, v.y, v.z, v.w);
}
};
-template <> struct PrintValue::Ty<sw::Vector4s>
+template<>
+struct PrintValue::Ty<sw::Vector4s>
{
- static std::string fmt(const sw::Vector4s& v)
+ static std::string fmt(const sw::Vector4s &v)
{
return "[x: " + PrintValue::fmt(v.x) +
", y: " + PrintValue::fmt(v.y) +
@@ -521,13 +537,13 @@
", w: " + PrintValue::fmt(v.w) + "]";
}
- static std::vector<rr::Value*> val(const sw::Vector4s& v)
+ static std::vector<rr::Value *> val(const sw::Vector4s &v)
{
return PrintValue::vals(v.x, v.y, v.z, v.w);
}
};
-} // namespace sw
-#endif // ENABLE_RR_PRINT
+} // namespace rr
+#endif // ENABLE_RR_PRINT
-#endif // sw_ShaderCore_hpp
+#endif // sw_ShaderCore_hpp
diff --git a/src/Pipeline/SpirvID.hpp b/src/Pipeline/SpirvID.hpp
index 1f4624d..35b910a 100644
--- a/src/Pipeline/SpirvID.hpp
+++ b/src/Pipeline/SpirvID.hpp
@@ -15,8 +15,8 @@
#ifndef sw_ID_hpp
#define sw_ID_hpp
-#include <unordered_map>
#include <cstdint>
+#include <unordered_map>
namespace sw {
@@ -25,39 +25,43 @@
// ID; instead it is used to prevent implicit casts between identifiers of
// different T types.
// IDs are typically used as a map key to value of type T.
-template <typename T>
+template<typename T>
class SpirvID
{
public:
- SpirvID() : id(0) {}
- SpirvID(uint32_t id) : id(id) {}
- bool operator == (const SpirvID<T>& rhs) const { return id == rhs.id; }
- bool operator != (const SpirvID<T>& rhs) const { return id != rhs.id; }
- bool operator < (const SpirvID<T>& rhs) const { return id < rhs.id; }
+ SpirvID()
+ : id(0)
+ {}
+ SpirvID(uint32_t id)
+ : id(id)
+ {}
+ bool operator==(const SpirvID<T> &rhs) const { return id == rhs.id; }
+ bool operator!=(const SpirvID<T> &rhs) const { return id != rhs.id; }
+ bool operator<(const SpirvID<T> &rhs) const { return id < rhs.id; }
// value returns the numerical value of the identifier.
uint32_t value() const { return id; }
+
private:
uint32_t id;
};
// HandleMap<T> is an unordered map of SpirvID<T> to T.
-template <typename T>
+template<typename T>
using HandleMap = std::unordered_map<SpirvID<T>, T>;
-}
+} // namespace sw
-namespace std
-{
+namespace std {
// std::hash implementation for sw::SpirvID<T>
template<typename T>
-struct hash< sw::SpirvID<T> >
+struct hash<sw::SpirvID<T> >
{
- std::size_t operator()(const sw::SpirvID<T>& id) const noexcept
+ std::size_t operator()(const sw::SpirvID<T> &id) const noexcept
{
return std::hash<uint32_t>()(id.value());
}
};
-} // namespace sw
+} // namespace std
#endif // sw_ID_hpp
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 11b3770..2350195 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -23,17 +23,18 @@
namespace sw {
SpirvShader::SpirvShader(
- uint32_t codeSerialID,
- VkShaderStageFlagBits pipelineStage,
- const char *entryPointName,
- InsnStore const &insns,
- const vk::RenderPass *renderPass,
- uint32_t subpassIndex,
- bool robustBufferAccess)
- : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
- outputs{MAX_INTERFACE_COMPONENTS},
- codeSerialID(codeSerialID),
- robustBufferAccess(robustBufferAccess)
+ uint32_t codeSerialID,
+ VkShaderStageFlagBits pipelineStage,
+ const char *entryPointName,
+ InsnStore const &insns,
+ const vk::RenderPass *renderPass,
+ uint32_t subpassIndex,
+ bool robustBufferAccess)
+ : insns{ insns }
+ , inputs{ MAX_INTERFACE_COMPONENTS }
+ , outputs{ MAX_INTERFACE_COMPONENTS }
+ , codeSerialID(codeSerialID)
+ , robustBufferAccess(robustBufferAccess)
{
ASSERT(insns.size() > 0);
@@ -46,7 +47,8 @@
{
auto attachmentIndex = subpass.pInputAttachments[i].attachment;
inputAttachmentFormats.push_back(attachmentIndex != VK_ATTACHMENT_UNUSED
- ? renderPass->getAttachment(attachmentIndex).format : VK_FORMAT_UNDEFINED);
+ ? renderPass->getAttachment(attachmentIndex).format
+ : VK_FORMAT_UNDEFINED);
}
}
@@ -63,403 +65,404 @@
switch(opcode)
{
- case spv::OpEntryPoint:
- {
- executionModel = spv::ExecutionModel(insn.word(1));
- auto id = Function::ID(insn.word(2));
- auto name = insn.string(3);
- auto stage = executionModelToStage(executionModel);
- if(stage == pipelineStage && strcmp(name, entryPointName) == 0)
+ case spv::OpEntryPoint:
{
- ASSERT_MSG(entryPoint == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
- entryPoint = id;
- }
- break;
- }
-
- case spv::OpExecutionMode:
- ProcessExecutionMode(insn);
- break;
-
- case spv::OpDecorate:
- {
- TypeOrObjectID targetId = insn.word(1);
- auto decoration = static_cast<spv::Decoration>(insn.word(2));
- uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0;
-
- decorations[targetId].Apply(decoration, value);
-
- switch(decoration)
- {
- case spv::DecorationDescriptorSet:
- descriptorDecorations[targetId].DescriptorSet = value;
- break;
- case spv::DecorationBinding:
- descriptorDecorations[targetId].Binding = value;
- break;
- case spv::DecorationInputAttachmentIndex:
- descriptorDecorations[targetId].InputAttachmentIndex = value;
- break;
- default:
- // Only handling descriptor decorations here.
+ executionModel = spv::ExecutionModel(insn.word(1));
+ auto id = Function::ID(insn.word(2));
+ auto name = insn.string(3);
+ auto stage = executionModelToStage(executionModel);
+ if(stage == pipelineStage && strcmp(name, entryPointName) == 0)
+ {
+ ASSERT_MSG(entryPoint == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
+ entryPoint = id;
+ }
break;
}
- if(decoration == spv::DecorationCentroid)
- modes.NeedsCentroid = true;
- break;
- }
+ case spv::OpExecutionMode:
+ ProcessExecutionMode(insn);
+ break;
- case spv::OpMemberDecorate:
- {
- Type::ID targetId = insn.word(1);
- auto memberIndex = insn.word(2);
- auto decoration = static_cast<spv::Decoration>(insn.word(3));
- uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0;
-
- auto &d = memberDecorations[targetId];
- if(memberIndex >= d.size())
- d.resize(memberIndex + 1); // on demand; exact size would require another pass...
-
- d[memberIndex].Apply(decoration, value);
-
- if(decoration == spv::DecorationCentroid)
- modes.NeedsCentroid = true;
- break;
- }
-
- case spv::OpDecorationGroup:
- // Nothing to do here. We don't need to record the definition of the group; we'll just have
- // the bundle of decorations float around. If we were to ever walk the decorations directly,
- // we might think about introducing this as a real Object.
- break;
-
- case spv::OpGroupDecorate:
- {
- uint32_t group = insn.word(1);
- auto const &groupDecorations = decorations[group];
- auto const &descriptorGroupDecorations = descriptorDecorations[group];
- for(auto i = 2u; i < insn.wordCount(); i++)
+ case spv::OpDecorate:
{
- // Remaining operands are targets to apply the group to.
- uint32_t target = insn.word(i);
- decorations[target].Apply(groupDecorations);
- descriptorDecorations[target].Apply(descriptorGroupDecorations);
+ TypeOrObjectID targetId = insn.word(1);
+ auto decoration = static_cast<spv::Decoration>(insn.word(2));
+ uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0;
+
+ decorations[targetId].Apply(decoration, value);
+
+ switch(decoration)
+ {
+ case spv::DecorationDescriptorSet:
+ descriptorDecorations[targetId].DescriptorSet = value;
+ break;
+ case spv::DecorationBinding:
+ descriptorDecorations[targetId].Binding = value;
+ break;
+ case spv::DecorationInputAttachmentIndex:
+ descriptorDecorations[targetId].InputAttachmentIndex = value;
+ break;
+ default:
+ // Only handling descriptor decorations here.
+ break;
+ }
+
+ if(decoration == spv::DecorationCentroid)
+ modes.NeedsCentroid = true;
+ break;
}
- break;
- }
-
- case spv::OpGroupMemberDecorate:
- {
- auto const &srcDecorations = decorations[insn.word(1)];
- for(auto i = 2u; i < insn.wordCount(); i += 2)
+ case spv::OpMemberDecorate:
{
- // remaining operands are pairs of <id>, literal for members to apply to.
- auto &d = memberDecorations[insn.word(i)];
- auto memberIndex = insn.word(i + 1);
+ Type::ID targetId = insn.word(1);
+ auto memberIndex = insn.word(2);
+ auto decoration = static_cast<spv::Decoration>(insn.word(3));
+ uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0;
+
+ auto &d = memberDecorations[targetId];
if(memberIndex >= d.size())
- d.resize(memberIndex + 1); // on demand resize, see above...
- d[memberIndex].Apply(srcDecorations);
+ d.resize(memberIndex + 1); // on demand; exact size would require another pass...
+
+ d[memberIndex].Apply(decoration, value);
+
+ if(decoration == spv::DecorationCentroid)
+ modes.NeedsCentroid = true;
+ break;
}
- break;
- }
- case spv::OpLabel:
- {
- ASSERT(currentBlock.value() == 0);
- currentBlock = Block::ID(insn.word(1));
- blockStart = insn;
- break;
- }
-
- // Branch Instructions (subset of Termination Instructions):
- case spv::OpBranch:
- case spv::OpBranchConditional:
- case spv::OpSwitch:
- case spv::OpReturn:
- // fallthrough
-
- // Termination instruction:
- case spv::OpKill:
- case spv::OpUnreachable:
- {
- ASSERT(currentBlock.value() != 0);
- ASSERT(currentFunction.value() != 0);
-
- auto blockEnd = insn; blockEnd++;
- functions[currentFunction].blocks[currentBlock] = Block(blockStart, blockEnd);
- currentBlock = Block::ID(0);
-
- if(opcode == spv::OpKill)
- {
- modes.ContainsKill = true;
- }
- break;
- }
-
- case spv::OpLoopMerge:
- case spv::OpSelectionMerge:
- break; // Nothing to do in analysis pass.
-
- case spv::OpTypeVoid:
- case spv::OpTypeBool:
- case spv::OpTypeInt:
- case spv::OpTypeFloat:
- case spv::OpTypeVector:
- case spv::OpTypeMatrix:
- case spv::OpTypeImage:
- case spv::OpTypeSampler:
- case spv::OpTypeSampledImage:
- case spv::OpTypeArray:
- case spv::OpTypeRuntimeArray:
- case spv::OpTypeStruct:
- case spv::OpTypePointer:
- case spv::OpTypeFunction:
- DeclareType(insn);
- break;
-
- case spv::OpVariable:
- {
- Type::ID typeId = insn.word(1);
- Object::ID resultId = insn.word(2);
- auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
-
- auto &object = defs[resultId];
- object.kind = Object::Kind::Pointer;
- object.definition = insn;
- object.type = typeId;
-
- ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
- ASSERT(getType(typeId).storageClass == storageClass);
-
- switch(storageClass)
- {
- case spv::StorageClassInput:
- case spv::StorageClassOutput:
- ProcessInterfaceVariable(object);
+ case spv::OpDecorationGroup:
+ // Nothing to do here. We don't need to record the definition of the group; we'll just have
+ // the bundle of decorations float around. If we were to ever walk the decorations directly,
+ // we might think about introducing this as a real Object.
break;
- case spv::StorageClassUniform:
- case spv::StorageClassStorageBuffer:
- object.kind = Object::Kind::DescriptorSet;
+ case spv::OpGroupDecorate:
+ {
+ uint32_t group = insn.word(1);
+ auto const &groupDecorations = decorations[group];
+ auto const &descriptorGroupDecorations = descriptorDecorations[group];
+ for(auto i = 2u; i < insn.wordCount(); i++)
+ {
+ // Remaining operands are targets to apply the group to.
+ uint32_t target = insn.word(i);
+ decorations[target].Apply(groupDecorations);
+ descriptorDecorations[target].Apply(descriptorGroupDecorations);
+ }
+
+ break;
+ }
+
+ case spv::OpGroupMemberDecorate:
+ {
+ auto const &srcDecorations = decorations[insn.word(1)];
+ for(auto i = 2u; i < insn.wordCount(); i += 2)
+ {
+ // remaining operands are pairs of <id>, literal for members to apply to.
+ auto &d = memberDecorations[insn.word(i)];
+ auto memberIndex = insn.word(i + 1);
+ if(memberIndex >= d.size())
+ d.resize(memberIndex + 1); // on demand resize, see above...
+ d[memberIndex].Apply(srcDecorations);
+ }
+ break;
+ }
+
+ case spv::OpLabel:
+ {
+ ASSERT(currentBlock.value() == 0);
+ currentBlock = Block::ID(insn.word(1));
+ blockStart = insn;
+ break;
+ }
+
+ // Branch Instructions (subset of Termination Instructions):
+ case spv::OpBranch:
+ case spv::OpBranchConditional:
+ case spv::OpSwitch:
+ case spv::OpReturn:
+ // fallthrough
+
+ // Termination instruction:
+ case spv::OpKill:
+ case spv::OpUnreachable:
+ {
+ ASSERT(currentBlock.value() != 0);
+ ASSERT(currentFunction.value() != 0);
+
+ auto blockEnd = insn;
+ blockEnd++;
+ functions[currentFunction].blocks[currentBlock] = Block(blockStart, blockEnd);
+ currentBlock = Block::ID(0);
+
+ if(opcode == spv::OpKill)
+ {
+ modes.ContainsKill = true;
+ }
+ break;
+ }
+
+ case spv::OpLoopMerge:
+ case spv::OpSelectionMerge:
+ break; // Nothing to do in analysis pass.
+
+ case spv::OpTypeVoid:
+ case spv::OpTypeBool:
+ case spv::OpTypeInt:
+ case spv::OpTypeFloat:
+ case spv::OpTypeVector:
+ case spv::OpTypeMatrix:
+ case spv::OpTypeImage:
+ case spv::OpTypeSampler:
+ case spv::OpTypeSampledImage:
+ case spv::OpTypeArray:
+ case spv::OpTypeRuntimeArray:
+ case spv::OpTypeStruct:
+ case spv::OpTypePointer:
+ case spv::OpTypeFunction:
+ DeclareType(insn);
break;
- case spv::StorageClassPushConstant:
- case spv::StorageClassPrivate:
- case spv::StorageClassFunction:
- case spv::StorageClassUniformConstant:
- break; // Correctly handled.
-
- case spv::StorageClassWorkgroup:
+ case spv::OpVariable:
{
- auto &elTy = getType(getType(typeId).element);
- auto sizeInBytes = elTy.sizeInComponents * static_cast<uint32_t>(sizeof(float));
- workgroupMemory.allocate(resultId, sizeInBytes);
+ Type::ID typeId = insn.word(1);
+ Object::ID resultId = insn.word(2);
+ auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
+
+ auto &object = defs[resultId];
object.kind = Object::Kind::Pointer;
- break;
- }
- case spv::StorageClassAtomicCounter:
- case spv::StorageClassImage:
- UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
- break;
+ object.definition = insn;
+ object.type = typeId;
- case spv::StorageClassCrossWorkgroup:
- UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
- break;
+ ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
+ ASSERT(getType(typeId).storageClass == storageClass);
- case spv::StorageClassGeneric:
- UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
- break;
-
- default:
- UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
- break;
- }
- break;
- }
-
- case spv::OpConstant:
- case spv::OpSpecConstant:
- CreateConstant(insn).constantValue[0] = insn.word(3);
- break;
- case spv::OpConstantFalse:
- case spv::OpSpecConstantFalse:
- CreateConstant(insn).constantValue[0] = 0; // Represent Boolean false as zero.
- break;
- case spv::OpConstantTrue:
- case spv::OpSpecConstantTrue:
- CreateConstant(insn).constantValue[0] = ~0u; // Represent Boolean true as all bits set.
- break;
- case spv::OpConstantNull:
- case spv::OpUndef:
- {
- // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
- // OpConstantNull forms a constant of arbitrary type, all zeros.
- auto &object = CreateConstant(insn);
- auto &objectTy = getType(object.type);
- for(auto i = 0u; i < objectTy.sizeInComponents; i++)
- {
- object.constantValue[i] = 0;
- }
- break;
- }
- case spv::OpConstantComposite:
- case spv::OpSpecConstantComposite:
- {
- auto &object = CreateConstant(insn);
- auto offset = 0u;
- for(auto i = 0u; i < insn.wordCount() - 3; i++)
- {
- auto &constituent = getObject(insn.word(i + 3));
- auto &constituentTy = getType(constituent.type);
- for(auto j = 0u; j < constituentTy.sizeInComponents; j++)
+ switch(storageClass)
{
- object.constantValue[offset++] = constituent.constantValue[j];
+ case spv::StorageClassInput:
+ case spv::StorageClassOutput:
+ ProcessInterfaceVariable(object);
+ break;
+
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
+ object.kind = Object::Kind::DescriptorSet;
+ break;
+
+ case spv::StorageClassPushConstant:
+ case spv::StorageClassPrivate:
+ case spv::StorageClassFunction:
+ case spv::StorageClassUniformConstant:
+ break; // Correctly handled.
+
+ case spv::StorageClassWorkgroup:
+ {
+ auto &elTy = getType(getType(typeId).element);
+ auto sizeInBytes = elTy.sizeInComponents * static_cast<uint32_t>(sizeof(float));
+ workgroupMemory.allocate(resultId, sizeInBytes);
+ object.kind = Object::Kind::Pointer;
+ break;
+ }
+ case spv::StorageClassAtomicCounter:
+ case spv::StorageClassImage:
+ UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
+ break;
+
+ case spv::StorageClassCrossWorkgroup:
+ UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
+ break;
+
+ case spv::StorageClassGeneric:
+ UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
+ break;
+
+ default:
+ UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
+ break;
}
+ break;
}
- auto objectId = Object::ID(insn.word(2));
- auto decorationsIt = decorations.find(objectId);
- if(decorationsIt != decorations.end() &&
- decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
+ case spv::OpConstant:
+ case spv::OpSpecConstant:
+ CreateConstant(insn).constantValue[0] = insn.word(3);
+ break;
+ case spv::OpConstantFalse:
+ case spv::OpSpecConstantFalse:
+ CreateConstant(insn).constantValue[0] = 0; // Represent Boolean false as zero.
+ break;
+ case spv::OpConstantTrue:
+ case spv::OpSpecConstantTrue:
+ CreateConstant(insn).constantValue[0] = ~0u; // Represent Boolean true as all bits set.
+ break;
+ case spv::OpConstantNull:
+ case spv::OpUndef:
{
- // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
- // Decorating an object with the WorkgroupSize built-in
- // decoration will make that object contain the dimensions
- // of a local workgroup. If an object is decorated with the
- // WorkgroupSize decoration, this must take precedence over
- // any execution mode set for LocalSize.
- // The object decorated with WorkgroupSize must be declared
- // as a three-component vector of 32-bit integers.
- ASSERT(getType(object.type).sizeInComponents == 3);
- modes.WorkgroupSizeX = object.constantValue[0];
- modes.WorkgroupSizeY = object.constantValue[1];
- modes.WorkgroupSizeZ = object.constantValue[2];
- }
- break;
- }
- case spv::OpSpecConstantOp:
- EvalSpecConstantOp(insn);
- break;
-
- case spv::OpCapability:
- {
- auto capability = static_cast<spv::Capability>(insn.word(1));
- switch(capability)
- {
- case spv::CapabilityMatrix: capabilities.Matrix = true; break;
- case spv::CapabilityShader: capabilities.Shader = true; break;
- case spv::CapabilityClipDistance: capabilities.ClipDistance = true; break;
- case spv::CapabilityCullDistance: capabilities.CullDistance = true; break;
- case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break;
- case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break;
- case spv::CapabilityImage1D: capabilities.Image1D = true; break;
- case spv::CapabilityImageCubeArray: capabilities.ImageCubeArray = true; break;
- case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break;
- case spv::CapabilitySampledCubeArray: capabilities.SampledCubeArray = true; break;
- case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break;
- case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
- case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break;
- case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
- case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
- case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
- case spv::CapabilityGroupNonUniformArithmetic: capabilities.GroupNonUniformArithmetic = true; break;
- case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
- case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
- case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
- case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
- case spv::CapabilityMultiView: capabilities.MultiView = true; break;
- default:
- UNSUPPORTED("Unsupported capability %u", insn.word(1));
- }
- break; // Various capabilities will be declared, but none affect our code generation at this point.
- }
-
- case spv::OpMemoryModel:
- break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
-
- case spv::OpFunction:
- {
- auto functionId = Function::ID(insn.word(2));
- ASSERT_MSG(currentFunction == 0, "Functions %d and %d overlap", currentFunction.value(), functionId.value());
- currentFunction = functionId;
- auto &function = functions[functionId];
- function.result = Type::ID(insn.word(1));
- function.type = Type::ID(insn.word(4));
- // Scan forward to find the function's label.
- for(auto it = insn; it != end() && function.entry == 0; it++)
- {
- switch(it.opcode())
+ // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
+ // OpConstantNull forms a constant of arbitrary type, all zeros.
+ auto &object = CreateConstant(insn);
+ auto &objectTy = getType(object.type);
+ for(auto i = 0u; i < objectTy.sizeInComponents; i++)
{
- case spv::OpFunction:
- case spv::OpFunctionParameter:
- break;
- case spv::OpLabel:
- function.entry = Block::ID(it.word(1));
- break;
- default:
- WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
+ object.constantValue[i] = 0;
}
+ break;
}
- ASSERT_MSG(function.entry != 0, "Function<%d> has no label", currentFunction.value());
- break;
- }
-
- case spv::OpFunctionEnd:
- currentFunction = 0;
- break;
-
- case spv::OpExtInstImport:
- {
- // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
- // Valid shaders will not attempt to import any other instruction sets.
- auto ext = insn.string(2);
- if(0 != strcmp("GLSL.std.450", ext))
+ case spv::OpConstantComposite:
+ case spv::OpSpecConstantComposite:
{
- UNSUPPORTED("SPIR-V Extension: %s", ext);
+ auto &object = CreateConstant(insn);
+ auto offset = 0u;
+ for(auto i = 0u; i < insn.wordCount() - 3; i++)
+ {
+ auto &constituent = getObject(insn.word(i + 3));
+ auto &constituentTy = getType(constituent.type);
+ for(auto j = 0u; j < constituentTy.sizeInComponents; j++)
+ {
+ object.constantValue[offset++] = constituent.constantValue[j];
+ }
+ }
+
+ auto objectId = Object::ID(insn.word(2));
+ auto decorationsIt = decorations.find(objectId);
+ if(decorationsIt != decorations.end() &&
+ decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
+ {
+ // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
+ // Decorating an object with the WorkgroupSize built-in
+ // decoration will make that object contain the dimensions
+ // of a local workgroup. If an object is decorated with the
+ // WorkgroupSize decoration, this must take precedence over
+ // any execution mode set for LocalSize.
+ // The object decorated with WorkgroupSize must be declared
+ // as a three-component vector of 32-bit integers.
+ ASSERT(getType(object.type).sizeInComponents == 3);
+ modes.WorkgroupSizeX = object.constantValue[0];
+ modes.WorkgroupSizeY = object.constantValue[1];
+ modes.WorkgroupSizeZ = object.constantValue[2];
+ }
+ break;
}
- break;
- }
- case spv::OpName:
- case spv::OpMemberName:
- case spv::OpSource:
- case spv::OpSourceContinued:
- case spv::OpSourceExtension:
- case spv::OpLine:
- case spv::OpNoLine:
- case spv::OpModuleProcessed:
- case spv::OpString:
- // No semantic impact
- break;
+ case spv::OpSpecConstantOp:
+ EvalSpecConstantOp(insn);
+ break;
- case spv::OpFunctionParameter:
- // These should have all been removed by preprocessing passes. If we see them here,
- // our assumptions are wrong and we will probably generate wrong code.
- UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str());
- break;
+ case spv::OpCapability:
+ {
+ auto capability = static_cast<spv::Capability>(insn.word(1));
+ switch(capability)
+ {
+ case spv::CapabilityMatrix: capabilities.Matrix = true; break;
+ case spv::CapabilityShader: capabilities.Shader = true; break;
+ case spv::CapabilityClipDistance: capabilities.ClipDistance = true; break;
+ case spv::CapabilityCullDistance: capabilities.CullDistance = true; break;
+ case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break;
+ case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break;
+ case spv::CapabilityImage1D: capabilities.Image1D = true; break;
+ case spv::CapabilityImageCubeArray: capabilities.ImageCubeArray = true; break;
+ case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break;
+ case spv::CapabilitySampledCubeArray: capabilities.SampledCubeArray = true; break;
+ case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break;
+ case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
+ case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break;
+ case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
+ case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
+ case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
+ case spv::CapabilityGroupNonUniformArithmetic: capabilities.GroupNonUniformArithmetic = true; break;
+ case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
+ case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
+ case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
+ case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
+ case spv::CapabilityMultiView: capabilities.MultiView = true; break;
+ default:
+ UNSUPPORTED("Unsupported capability %u", insn.word(1));
+ }
+ break; // Various capabilities will be declared, but none affect our code generation at this point.
+ }
- case spv::OpFunctionCall:
- // TODO(b/141246700): Add full support for spv::OpFunctionCall
- break;
+ case spv::OpMemoryModel:
+ break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
- case spv::OpFConvert:
- UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
- break;
+ case spv::OpFunction:
+ {
+ auto functionId = Function::ID(insn.word(2));
+ ASSERT_MSG(currentFunction == 0, "Functions %d and %d overlap", currentFunction.value(), functionId.value());
+ currentFunction = functionId;
+ auto &function = functions[functionId];
+ function.result = Type::ID(insn.word(1));
+ function.type = Type::ID(insn.word(4));
+ // Scan forward to find the function's label.
+ for(auto it = insn; it != end() && function.entry == 0; it++)
+ {
+ switch(it.opcode())
+ {
+ case spv::OpFunction:
+ case spv::OpFunctionParameter:
+ break;
+ case spv::OpLabel:
+ function.entry = Block::ID(it.word(1));
+ break;
+ default:
+ WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
+ }
+ }
+ ASSERT_MSG(function.entry != 0, "Function<%d> has no label", currentFunction.value());
+ break;
+ }
- case spv::OpSConvert:
- UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
- break;
+ case spv::OpFunctionEnd:
+ currentFunction = 0;
+ break;
- case spv::OpUConvert:
- UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
- break;
+ case spv::OpExtInstImport:
+ {
+ // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
+ // Valid shaders will not attempt to import any other instruction sets.
+ auto ext = insn.string(2);
+ if(0 != strcmp("GLSL.std.450", ext))
+ {
+ UNSUPPORTED("SPIR-V Extension: %s", ext);
+ }
+ break;
+ }
+ case spv::OpName:
+ case spv::OpMemberName:
+ case spv::OpSource:
+ case spv::OpSourceContinued:
+ case spv::OpSourceExtension:
+ case spv::OpLine:
+ case spv::OpNoLine:
+ case spv::OpModuleProcessed:
+ case spv::OpString:
+ // No semantic impact
+ break;
- case spv::OpLoad:
- case spv::OpAccessChain:
- case spv::OpInBoundsAccessChain:
- case spv::OpSampledImage:
- case spv::OpImage:
+ case spv::OpFunctionParameter:
+ // These should have all been removed by preprocessing passes. If we see them here,
+ // our assumptions are wrong and we will probably generate wrong code.
+ UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str());
+ break;
+
+ case spv::OpFunctionCall:
+ // TODO(b/141246700): Add full support for spv::OpFunctionCall
+ break;
+
+ case spv::OpFConvert:
+ UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
+ break;
+
+ case spv::OpSConvert:
+ UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
+ break;
+
+ case spv::OpUConvert:
+ UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
+ break;
+
+ case spv::OpLoad:
+ case spv::OpAccessChain:
+ case spv::OpInBoundsAccessChain:
+ case spv::OpSampledImage:
+ case spv::OpImage:
{
// Propagate the descriptor decorations to the result.
Object::ID resultId = insn.word(2);
@@ -484,202 +487,202 @@
}
break;
- case spv::OpCompositeConstruct:
- case spv::OpCompositeInsert:
- case spv::OpCompositeExtract:
- case spv::OpVectorShuffle:
- case spv::OpVectorTimesScalar:
- case spv::OpMatrixTimesScalar:
- case spv::OpMatrixTimesVector:
- case spv::OpVectorTimesMatrix:
- case spv::OpMatrixTimesMatrix:
- case spv::OpOuterProduct:
- case spv::OpTranspose:
- case spv::OpVectorExtractDynamic:
- case spv::OpVectorInsertDynamic:
- // Unary ops
- case spv::OpNot:
- case spv::OpBitFieldInsert:
- case spv::OpBitFieldSExtract:
- case spv::OpBitFieldUExtract:
- case spv::OpBitReverse:
- case spv::OpBitCount:
- case spv::OpSNegate:
- case spv::OpFNegate:
- case spv::OpLogicalNot:
- case spv::OpQuantizeToF16:
- // Binary ops
- case spv::OpIAdd:
- case spv::OpISub:
- case spv::OpIMul:
- case spv::OpSDiv:
- case spv::OpUDiv:
- case spv::OpFAdd:
- case spv::OpFSub:
- case spv::OpFMul:
- case spv::OpFDiv:
- case spv::OpFMod:
- case spv::OpFRem:
- case spv::OpFOrdEqual:
- case spv::OpFUnordEqual:
- case spv::OpFOrdNotEqual:
- case spv::OpFUnordNotEqual:
- case spv::OpFOrdLessThan:
- case spv::OpFUnordLessThan:
- case spv::OpFOrdGreaterThan:
- case spv::OpFUnordGreaterThan:
- case spv::OpFOrdLessThanEqual:
- case spv::OpFUnordLessThanEqual:
- case spv::OpFOrdGreaterThanEqual:
- case spv::OpFUnordGreaterThanEqual:
- case spv::OpSMod:
- case spv::OpSRem:
- case spv::OpUMod:
- case spv::OpIEqual:
- case spv::OpINotEqual:
- case spv::OpUGreaterThan:
- case spv::OpSGreaterThan:
- case spv::OpUGreaterThanEqual:
- case spv::OpSGreaterThanEqual:
- case spv::OpULessThan:
- case spv::OpSLessThan:
- case spv::OpULessThanEqual:
- case spv::OpSLessThanEqual:
- case spv::OpShiftRightLogical:
- case spv::OpShiftRightArithmetic:
- case spv::OpShiftLeftLogical:
- case spv::OpBitwiseOr:
- case spv::OpBitwiseXor:
- case spv::OpBitwiseAnd:
- case spv::OpLogicalOr:
- case spv::OpLogicalAnd:
- case spv::OpLogicalEqual:
- case spv::OpLogicalNotEqual:
- case spv::OpUMulExtended:
- case spv::OpSMulExtended:
- case spv::OpIAddCarry:
- case spv::OpISubBorrow:
- case spv::OpDot:
- case spv::OpConvertFToU:
- case spv::OpConvertFToS:
- case spv::OpConvertSToF:
- case spv::OpConvertUToF:
- case spv::OpBitcast:
- case spv::OpSelect:
- case spv::OpExtInst:
- case spv::OpIsInf:
- case spv::OpIsNan:
- case spv::OpAny:
- case spv::OpAll:
- case spv::OpDPdx:
- case spv::OpDPdxCoarse:
- case spv::OpDPdy:
- case spv::OpDPdyCoarse:
- case spv::OpFwidth:
- case spv::OpFwidthCoarse:
- case spv::OpDPdxFine:
- case spv::OpDPdyFine:
- case spv::OpFwidthFine:
- case spv::OpAtomicLoad:
- case spv::OpAtomicIAdd:
- case spv::OpAtomicISub:
- case spv::OpAtomicSMin:
- case spv::OpAtomicSMax:
- case spv::OpAtomicUMin:
- case spv::OpAtomicUMax:
- case spv::OpAtomicAnd:
- case spv::OpAtomicOr:
- case spv::OpAtomicXor:
- case spv::OpAtomicIIncrement:
- case spv::OpAtomicIDecrement:
- case spv::OpAtomicExchange:
- case spv::OpAtomicCompareExchange:
- case spv::OpPhi:
- case spv::OpImageSampleImplicitLod:
- case spv::OpImageSampleExplicitLod:
- case spv::OpImageSampleDrefImplicitLod:
- case spv::OpImageSampleDrefExplicitLod:
- case spv::OpImageSampleProjImplicitLod:
- case spv::OpImageSampleProjExplicitLod:
- case spv::OpImageSampleProjDrefImplicitLod:
- case spv::OpImageSampleProjDrefExplicitLod:
- case spv::OpImageGather:
- case spv::OpImageDrefGather:
- case spv::OpImageFetch:
- case spv::OpImageQuerySizeLod:
- case spv::OpImageQuerySize:
- case spv::OpImageQueryLod:
- case spv::OpImageQueryLevels:
- case spv::OpImageQuerySamples:
- case spv::OpImageRead:
- case spv::OpImageTexelPointer:
- case spv::OpGroupNonUniformElect:
- case spv::OpGroupNonUniformAll:
- case spv::OpGroupNonUniformAny:
- case spv::OpGroupNonUniformAllEqual:
- case spv::OpGroupNonUniformBroadcast:
- case spv::OpGroupNonUniformBroadcastFirst:
- case spv::OpGroupNonUniformBallot:
- case spv::OpGroupNonUniformInverseBallot:
- case spv::OpGroupNonUniformBallotBitExtract:
- case spv::OpGroupNonUniformBallotBitCount:
- case spv::OpGroupNonUniformBallotFindLSB:
- case spv::OpGroupNonUniformBallotFindMSB:
- case spv::OpGroupNonUniformShuffle:
- case spv::OpGroupNonUniformShuffleXor:
- case spv::OpGroupNonUniformShuffleUp:
- case spv::OpGroupNonUniformShuffleDown:
- case spv::OpGroupNonUniformIAdd:
- case spv::OpGroupNonUniformFAdd:
- case spv::OpGroupNonUniformIMul:
- case spv::OpGroupNonUniformFMul:
- case spv::OpGroupNonUniformSMin:
- case spv::OpGroupNonUniformUMin:
- case spv::OpGroupNonUniformFMin:
- case spv::OpGroupNonUniformSMax:
- case spv::OpGroupNonUniformUMax:
- case spv::OpGroupNonUniformFMax:
- case spv::OpGroupNonUniformBitwiseAnd:
- case spv::OpGroupNonUniformBitwiseOr:
- case spv::OpGroupNonUniformBitwiseXor:
- case spv::OpGroupNonUniformLogicalAnd:
- case spv::OpGroupNonUniformLogicalOr:
- case spv::OpGroupNonUniformLogicalXor:
- case spv::OpCopyObject:
- case spv::OpArrayLength:
- // Instructions that yield an intermediate value or divergent pointer
- DefineResult(insn);
- break;
+ case spv::OpCompositeConstruct:
+ case spv::OpCompositeInsert:
+ case spv::OpCompositeExtract:
+ case spv::OpVectorShuffle:
+ case spv::OpVectorTimesScalar:
+ case spv::OpMatrixTimesScalar:
+ case spv::OpMatrixTimesVector:
+ case spv::OpVectorTimesMatrix:
+ case spv::OpMatrixTimesMatrix:
+ case spv::OpOuterProduct:
+ case spv::OpTranspose:
+ case spv::OpVectorExtractDynamic:
+ case spv::OpVectorInsertDynamic:
+ // Unary ops
+ case spv::OpNot:
+ case spv::OpBitFieldInsert:
+ case spv::OpBitFieldSExtract:
+ case spv::OpBitFieldUExtract:
+ case spv::OpBitReverse:
+ case spv::OpBitCount:
+ case spv::OpSNegate:
+ case spv::OpFNegate:
+ case spv::OpLogicalNot:
+ case spv::OpQuantizeToF16:
+ // Binary ops
+ case spv::OpIAdd:
+ case spv::OpISub:
+ case spv::OpIMul:
+ case spv::OpSDiv:
+ case spv::OpUDiv:
+ case spv::OpFAdd:
+ case spv::OpFSub:
+ case spv::OpFMul:
+ case spv::OpFDiv:
+ case spv::OpFMod:
+ case spv::OpFRem:
+ case spv::OpFOrdEqual:
+ case spv::OpFUnordEqual:
+ case spv::OpFOrdNotEqual:
+ case spv::OpFUnordNotEqual:
+ case spv::OpFOrdLessThan:
+ case spv::OpFUnordLessThan:
+ case spv::OpFOrdGreaterThan:
+ case spv::OpFUnordGreaterThan:
+ case spv::OpFOrdLessThanEqual:
+ case spv::OpFUnordLessThanEqual:
+ case spv::OpFOrdGreaterThanEqual:
+ case spv::OpFUnordGreaterThanEqual:
+ case spv::OpSMod:
+ case spv::OpSRem:
+ case spv::OpUMod:
+ case spv::OpIEqual:
+ case spv::OpINotEqual:
+ case spv::OpUGreaterThan:
+ case spv::OpSGreaterThan:
+ case spv::OpUGreaterThanEqual:
+ case spv::OpSGreaterThanEqual:
+ case spv::OpULessThan:
+ case spv::OpSLessThan:
+ case spv::OpULessThanEqual:
+ case spv::OpSLessThanEqual:
+ case spv::OpShiftRightLogical:
+ case spv::OpShiftRightArithmetic:
+ case spv::OpShiftLeftLogical:
+ case spv::OpBitwiseOr:
+ case spv::OpBitwiseXor:
+ case spv::OpBitwiseAnd:
+ case spv::OpLogicalOr:
+ case spv::OpLogicalAnd:
+ case spv::OpLogicalEqual:
+ case spv::OpLogicalNotEqual:
+ case spv::OpUMulExtended:
+ case spv::OpSMulExtended:
+ case spv::OpIAddCarry:
+ case spv::OpISubBorrow:
+ case spv::OpDot:
+ case spv::OpConvertFToU:
+ case spv::OpConvertFToS:
+ case spv::OpConvertSToF:
+ case spv::OpConvertUToF:
+ case spv::OpBitcast:
+ case spv::OpSelect:
+ case spv::OpExtInst:
+ case spv::OpIsInf:
+ case spv::OpIsNan:
+ case spv::OpAny:
+ case spv::OpAll:
+ case spv::OpDPdx:
+ case spv::OpDPdxCoarse:
+ case spv::OpDPdy:
+ case spv::OpDPdyCoarse:
+ case spv::OpFwidth:
+ case spv::OpFwidthCoarse:
+ case spv::OpDPdxFine:
+ case spv::OpDPdyFine:
+ case spv::OpFwidthFine:
+ case spv::OpAtomicLoad:
+ case spv::OpAtomicIAdd:
+ case spv::OpAtomicISub:
+ case spv::OpAtomicSMin:
+ case spv::OpAtomicSMax:
+ case spv::OpAtomicUMin:
+ case spv::OpAtomicUMax:
+ case spv::OpAtomicAnd:
+ case spv::OpAtomicOr:
+ case spv::OpAtomicXor:
+ case spv::OpAtomicIIncrement:
+ case spv::OpAtomicIDecrement:
+ case spv::OpAtomicExchange:
+ case spv::OpAtomicCompareExchange:
+ case spv::OpPhi:
+ case spv::OpImageSampleImplicitLod:
+ case spv::OpImageSampleExplicitLod:
+ case spv::OpImageSampleDrefImplicitLod:
+ case spv::OpImageSampleDrefExplicitLod:
+ case spv::OpImageSampleProjImplicitLod:
+ case spv::OpImageSampleProjExplicitLod:
+ case spv::OpImageSampleProjDrefImplicitLod:
+ case spv::OpImageSampleProjDrefExplicitLod:
+ case spv::OpImageGather:
+ case spv::OpImageDrefGather:
+ case spv::OpImageFetch:
+ case spv::OpImageQuerySizeLod:
+ case spv::OpImageQuerySize:
+ case spv::OpImageQueryLod:
+ case spv::OpImageQueryLevels:
+ case spv::OpImageQuerySamples:
+ case spv::OpImageRead:
+ case spv::OpImageTexelPointer:
+ case spv::OpGroupNonUniformElect:
+ case spv::OpGroupNonUniformAll:
+ case spv::OpGroupNonUniformAny:
+ case spv::OpGroupNonUniformAllEqual:
+ case spv::OpGroupNonUniformBroadcast:
+ case spv::OpGroupNonUniformBroadcastFirst:
+ case spv::OpGroupNonUniformBallot:
+ case spv::OpGroupNonUniformInverseBallot:
+ case spv::OpGroupNonUniformBallotBitExtract:
+ case spv::OpGroupNonUniformBallotBitCount:
+ case spv::OpGroupNonUniformBallotFindLSB:
+ case spv::OpGroupNonUniformBallotFindMSB:
+ case spv::OpGroupNonUniformShuffle:
+ case spv::OpGroupNonUniformShuffleXor:
+ case spv::OpGroupNonUniformShuffleUp:
+ case spv::OpGroupNonUniformShuffleDown:
+ case spv::OpGroupNonUniformIAdd:
+ case spv::OpGroupNonUniformFAdd:
+ case spv::OpGroupNonUniformIMul:
+ case spv::OpGroupNonUniformFMul:
+ case spv::OpGroupNonUniformSMin:
+ case spv::OpGroupNonUniformUMin:
+ case spv::OpGroupNonUniformFMin:
+ case spv::OpGroupNonUniformSMax:
+ case spv::OpGroupNonUniformUMax:
+ case spv::OpGroupNonUniformFMax:
+ case spv::OpGroupNonUniformBitwiseAnd:
+ case spv::OpGroupNonUniformBitwiseOr:
+ case spv::OpGroupNonUniformBitwiseXor:
+ case spv::OpGroupNonUniformLogicalAnd:
+ case spv::OpGroupNonUniformLogicalOr:
+ case spv::OpGroupNonUniformLogicalXor:
+ case spv::OpCopyObject:
+ case spv::OpArrayLength:
+ // Instructions that yield an intermediate value or divergent pointer
+ DefineResult(insn);
+ break;
- case spv::OpStore:
- case spv::OpAtomicStore:
- case spv::OpImageWrite:
- case spv::OpCopyMemory:
- case spv::OpMemoryBarrier:
- // Don't need to do anything during analysis pass
- break;
+ case spv::OpStore:
+ case spv::OpAtomicStore:
+ case spv::OpImageWrite:
+ case spv::OpCopyMemory:
+ case spv::OpMemoryBarrier:
+ // Don't need to do anything during analysis pass
+ break;
- case spv::OpControlBarrier:
- modes.ContainsControlBarriers = true;
- break;
+ case spv::OpControlBarrier:
+ modes.ContainsControlBarriers = true;
+ break;
- case spv::OpExtension:
- {
- auto ext = insn.string(1);
- // Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
- // extension per Appendix A, `Vulkan Environment for SPIR-V`.
- if(!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
- if(!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
- if(!strcmp(ext, "SPV_KHR_16bit_storage")) break;
- if(!strcmp(ext, "SPV_KHR_variable_pointers")) break;
- if(!strcmp(ext, "SPV_KHR_device_group")) break;
- if(!strcmp(ext, "SPV_KHR_multiview")) break;
- UNSUPPORTED("SPIR-V Extension: %s", ext);
- break;
- }
+ case spv::OpExtension:
+ {
+ auto ext = insn.string(1);
+ // Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
+ // extension per Appendix A, `Vulkan Environment for SPIR-V`.
+ if(!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
+ if(!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
+ if(!strcmp(ext, "SPV_KHR_16bit_storage")) break;
+ if(!strcmp(ext, "SPV_KHR_variable_pointers")) break;
+ if(!strcmp(ext, "SPV_KHR_device_group")) break;
+ if(!strcmp(ext, "SPV_KHR_multiview")) break;
+ UNSUPPORTED("SPIR-V Extension: %s", ext);
+ break;
+ }
- default:
- UNIMPLEMENTED("%s", OpcodeName(opcode).c_str());
+ default:
+ UNIMPLEMENTED("%s", OpcodeName(opcode).c_str());
}
}
@@ -702,45 +705,45 @@
// member. All members of such a structure are builtins.
switch(insn.opcode())
{
- case spv::OpTypeStruct:
- {
- auto d = memberDecorations.find(resultId);
- if(d != memberDecorations.end())
+ case spv::OpTypeStruct:
{
- for(auto &m : d->second)
+ auto d = memberDecorations.find(resultId);
+ if(d != memberDecorations.end())
{
- if(m.HasBuiltIn)
+ for(auto &m : d->second)
{
- type.isBuiltInBlock = true;
- break;
+ if(m.HasBuiltIn)
+ {
+ type.isBuiltInBlock = true;
+ break;
+ }
}
}
+ break;
}
- break;
- }
- case spv::OpTypePointer:
- {
- Type::ID elementTypeId = insn.word(3);
- type.element = elementTypeId;
- type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
- type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
- break;
- }
- case spv::OpTypeVector:
- case spv::OpTypeMatrix:
- case spv::OpTypeArray:
- case spv::OpTypeRuntimeArray:
- {
- Type::ID elementTypeId = insn.word(2);
- type.element = elementTypeId;
- break;
- }
- default:
- break;
+ case spv::OpTypePointer:
+ {
+ Type::ID elementTypeId = insn.word(3);
+ type.element = elementTypeId;
+ type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
+ type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
+ break;
+ }
+ case spv::OpTypeVector:
+ case spv::OpTypeMatrix:
+ case spv::OpTypeArray:
+ case spv::OpTypeRuntimeArray:
+ {
+ Type::ID elementTypeId = insn.word(2);
+ type.element = elementTypeId;
+ break;
+ }
+ default:
+ break;
}
}
-SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
+SpirvShader::Object &SpirvShader::CreateConstant(InsnIterator insn)
{
Type::ID typeId = insn.word(1);
Object::ID resultId = insn.word(2);
@@ -771,7 +774,7 @@
{
// walk the builtin block, registering each of its members separately.
auto m = memberDecorations.find(objectTy.element);
- ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
+ ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
auto &structType = pointeeTy.definition;
auto offset = 0u;
auto word = 2u;
@@ -781,7 +784,7 @@
if(member.HasBuiltIn)
{
- builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
+ builtinInterface[member.BuiltIn] = { resultId, offset, memberType.sizeInComponents };
}
offset += memberType.sizeInComponents;
@@ -793,24 +796,24 @@
auto d = decorations.find(resultId);
if(d != decorations.end() && d->second.HasBuiltIn)
{
- builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
+ builtinInterface[d->second.BuiltIn] = { resultId, 0, pointeeTy.sizeInComponents };
}
else
{
object.kind = Object::Kind::InterfaceVariable;
VisitInterface(resultId,
- [&userDefinedInterface](Decorations const &d, AttribType type) {
- // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
- auto scalarSlot = (d.Location << 2) | d.Component;
- ASSERT(scalarSlot >= 0 &&
- scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
+ [&userDefinedInterface](Decorations const &d, AttribType type) {
+ // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
+ auto scalarSlot = (d.Location << 2) | d.Component;
+ ASSERT(scalarSlot >= 0 &&
+ scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
- auto &slot = userDefinedInterface[scalarSlot];
- slot.Type = type;
- slot.Flat = d.Flat;
- slot.NoPerspective = d.NoPerspective;
- slot.Centroid = d.Centroid;
- });
+ auto &slot = userDefinedInterface[scalarSlot];
+ slot.Type = type;
+ slot.Flat = d.Flat;
+ slot.NoPerspective = d.NoPerspective;
+ slot.Centroid = d.Centroid;
+ });
}
}
@@ -819,31 +822,31 @@
auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
switch(mode)
{
- case spv::ExecutionModeEarlyFragmentTests:
- modes.EarlyFragmentTests = true;
- break;
- case spv::ExecutionModeDepthReplacing:
- modes.DepthReplacing = true;
- break;
- case spv::ExecutionModeDepthGreater:
- modes.DepthGreater = true;
- break;
- case spv::ExecutionModeDepthLess:
- modes.DepthLess = true;
- break;
- case spv::ExecutionModeDepthUnchanged:
- modes.DepthUnchanged = true;
- break;
- case spv::ExecutionModeLocalSize:
- modes.WorkgroupSizeX = insn.word(3);
- modes.WorkgroupSizeY = insn.word(4);
- modes.WorkgroupSizeZ = insn.word(5);
- break;
- case spv::ExecutionModeOriginUpperLeft:
- // This is always the case for a Vulkan shader. Do nothing.
- break;
- default:
- UNREACHABLE("Execution mode: %d", int(mode));
+ case spv::ExecutionModeEarlyFragmentTests:
+ modes.EarlyFragmentTests = true;
+ break;
+ case spv::ExecutionModeDepthReplacing:
+ modes.DepthReplacing = true;
+ break;
+ case spv::ExecutionModeDepthGreater:
+ modes.DepthGreater = true;
+ break;
+ case spv::ExecutionModeDepthLess:
+ modes.DepthLess = true;
+ break;
+ case spv::ExecutionModeDepthUnchanged:
+ modes.DepthUnchanged = true;
+ break;
+ case spv::ExecutionModeLocalSize:
+ modes.WorkgroupSizeX = insn.word(3);
+ modes.WorkgroupSizeY = insn.word(4);
+ modes.WorkgroupSizeZ = insn.word(5);
+ break;
+ case spv::ExecutionModeOriginUpperLeft:
+ // This is always the case for a Vulkan shader. Do nothing.
+ break;
+ default:
+ UNREACHABLE("Execution mode: %d", int(mode));
}
}
@@ -854,54 +857,54 @@
// already been described (and so their sizes determined)
switch(insn.opcode())
{
- case spv::OpTypeVoid:
- case spv::OpTypeSampler:
- case spv::OpTypeImage:
- case spv::OpTypeSampledImage:
- case spv::OpTypeFunction:
- case spv::OpTypeRuntimeArray:
- // Objects that don't consume any space.
- // Descriptor-backed objects currently only need exist at compile-time.
- // Runtime arrays don't appear in places where their size would be interesting
- return 0;
+ case spv::OpTypeVoid:
+ case spv::OpTypeSampler:
+ case spv::OpTypeImage:
+ case spv::OpTypeSampledImage:
+ case spv::OpTypeFunction:
+ case spv::OpTypeRuntimeArray:
+ // Objects that don't consume any space.
+ // Descriptor-backed objects currently only need exist at compile-time.
+ // Runtime arrays don't appear in places where their size would be interesting
+ return 0;
- case spv::OpTypeBool:
- case spv::OpTypeFloat:
- case spv::OpTypeInt:
- // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
- // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
- return 1;
+ case spv::OpTypeBool:
+ case spv::OpTypeFloat:
+ case spv::OpTypeInt:
+ // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
+ // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
+ return 1;
- case spv::OpTypeVector:
- case spv::OpTypeMatrix:
- // Vectors and matrices both consume element count * element size.
- return getType(insn.word(2)).sizeInComponents * insn.word(3);
+ case spv::OpTypeVector:
+ case spv::OpTypeMatrix:
+ // Vectors and matrices both consume element count * element size.
+ return getType(insn.word(2)).sizeInComponents * insn.word(3);
- case spv::OpTypeArray:
- {
- // Element count * element size. Array sizes come from constant ids.
- auto arraySize = GetConstScalarInt(insn.word(3));
- return getType(insn.word(2)).sizeInComponents * arraySize;
- }
-
- case spv::OpTypeStruct:
- {
- uint32_t size = 0;
- for(uint32_t i = 2u; i < insn.wordCount(); i++)
+ case spv::OpTypeArray:
{
- size += getType(insn.word(i)).sizeInComponents;
+ // Element count * element size. Array sizes come from constant ids.
+ auto arraySize = GetConstScalarInt(insn.word(3));
+ return getType(insn.word(2)).sizeInComponents * arraySize;
}
- return size;
- }
- case spv::OpTypePointer:
- // Runtime representation of a pointer is a per-lane index.
- // Note: clients are expected to look through the pointer if they want the pointee size instead.
- return 1;
+ case spv::OpTypeStruct:
+ {
+ uint32_t size = 0;
+ for(uint32_t i = 2u; i < insn.wordCount(); i++)
+ {
+ size += getType(insn.word(i)).sizeInComponents;
+ }
+ return size;
+ }
- default:
- UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
- return 0;
+ case spv::OpTypePointer:
+ // Runtime representation of a pointer is a per-lane index.
+ // Note: clients are expected to look through the pointer if they want the pointee size instead.
+ return 1;
+
+ default:
+ UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
+ return 0;
}
}
@@ -924,54 +927,54 @@
auto const &obj = getType(id);
switch(obj.opcode())
{
- case spv::OpTypePointer:
- return VisitInterfaceInner(obj.definition.word(3), d, f);
- case spv::OpTypeMatrix:
- for(auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
+ case spv::OpTypePointer:
+ return VisitInterfaceInner(obj.definition.word(3), d, f);
+ case spv::OpTypeMatrix:
+ for(auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
+ {
+ // consumes same components of N consecutive locations
+ VisitInterfaceInner(obj.definition.word(2), d, f);
+ }
+ return d.Location;
+ case spv::OpTypeVector:
+ for(auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
+ {
+ // consumes N consecutive components in the same location
+ VisitInterfaceInner(obj.definition.word(2), d, f);
+ }
+ return d.Location + 1;
+ case spv::OpTypeFloat:
+ f(d, ATTRIBTYPE_FLOAT);
+ return d.Location + 1;
+ case spv::OpTypeInt:
+ f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
+ return d.Location + 1;
+ case spv::OpTypeBool:
+ f(d, ATTRIBTYPE_UINT);
+ return d.Location + 1;
+ case spv::OpTypeStruct:
{
- // consumes same components of N consecutive locations
- VisitInterfaceInner(obj.definition.word(2), d, f);
+ // iterate over members, which may themselves have Location/Component decorations
+ for(auto i = 0u; i < obj.definition.wordCount() - 2; i++)
+ {
+ ApplyDecorationsForIdMember(&d, id, i);
+ d.Location = VisitInterfaceInner(obj.definition.word(i + 2), d, f);
+ d.Component = 0; // Implicit locations always have component=0
+ }
+ return d.Location;
}
- return d.Location;
- case spv::OpTypeVector:
- for(auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
+ case spv::OpTypeArray:
{
- // consumes N consecutive components in the same location
- VisitInterfaceInner(obj.definition.word(2), d, f);
+ auto arraySize = GetConstScalarInt(obj.definition.word(3));
+ for(auto i = 0u; i < arraySize; i++)
+ {
+ d.Location = VisitInterfaceInner(obj.definition.word(2), d, f);
+ }
+ return d.Location;
}
- return d.Location + 1;
- case spv::OpTypeFloat:
- f(d, ATTRIBTYPE_FLOAT);
- return d.Location + 1;
- case spv::OpTypeInt:
- f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
- return d.Location + 1;
- case spv::OpTypeBool:
- f(d, ATTRIBTYPE_UINT);
- return d.Location + 1;
- case spv::OpTypeStruct:
- {
- // iterate over members, which may themselves have Location/Component decorations
- for(auto i = 0u; i < obj.definition.wordCount() - 2; i++)
- {
- ApplyDecorationsForIdMember(&d, id, i);
- d.Location = VisitInterfaceInner(obj.definition.word(i + 2), d, f);
- d.Component = 0; // Implicit locations always have component=0
- }
- return d.Location;
- }
- case spv::OpTypeArray:
- {
- auto arraySize = GetConstScalarInt(obj.definition.word(3));
- for(auto i = 0u; i < arraySize; i++)
- {
- d.Location = VisitInterfaceInner(obj.definition.word(2), d, f);
- }
- return d.Location;
- }
- default:
- // Intentionally partial; most opcodes do not participate in type hierarchies
- return 0;
+ default:
+ // Intentionally partial; most opcodes do not participate in type hierarchies
+ return 0;
}
}
@@ -996,33 +999,33 @@
for(auto i = 0u; i < numIndexes; i++)
{
ApplyDecorationsForId(d, typeId);
- auto & type = getType(typeId);
+ auto &type = getType(typeId);
switch(type.opcode())
{
- case spv::OpTypeStruct:
- {
- int memberIndex = GetConstScalarInt(indexIds[i]);
- ApplyDecorationsForIdMember(d, typeId, memberIndex);
- typeId = type.definition.word(2u + memberIndex);
- break;
- }
- case spv::OpTypeArray:
- case spv::OpTypeRuntimeArray:
- if(dd->InputAttachmentIndex >= 0)
+ case spv::OpTypeStruct:
{
- dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]);
+ int memberIndex = GetConstScalarInt(indexIds[i]);
+ ApplyDecorationsForIdMember(d, typeId, memberIndex);
+ typeId = type.definition.word(2u + memberIndex);
+ break;
}
- typeId = type.element;
- break;
- case spv::OpTypeVector:
- typeId = type.element;
- break;
- case spv::OpTypeMatrix:
- typeId = type.element;
- d->InsideMatrix = true;
- break;
- default:
- UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
+ case spv::OpTypeArray:
+ case spv::OpTypeRuntimeArray:
+ if(dd->InputAttachmentIndex >= 0)
+ {
+ dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]);
+ }
+ typeId = type.element;
+ break;
+ case spv::OpTypeVector:
+ typeId = type.element;
+ break;
+ case spv::OpTypeMatrix:
+ typeId = type.element;
+ d->InsideMatrix = true;
+ break;
+ default:
+ UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
}
}
}
@@ -1057,72 +1060,72 @@
for(auto i = 0u; i < numIndexes; i++)
{
- auto & type = getType(typeId);
+ auto &type = getType(typeId);
ApplyDecorationsForId(&d, typeId);
switch(type.definition.opcode())
{
- case spv::OpTypeStruct:
- {
- int memberIndex = GetConstScalarInt(indexIds[i]);
- ApplyDecorationsForIdMember(&d, typeId, memberIndex);
- ASSERT(d.HasOffset);
- constantOffset += d.Offset;
- typeId = type.definition.word(2u + memberIndex);
- break;
- }
- case spv::OpTypeArray:
- case spv::OpTypeRuntimeArray:
- {
- // TODO: b/127950082: Check bounds.
- ASSERT(d.HasArrayStride);
- auto & obj = getObject(indexIds[i]);
- if(obj.kind == Object::Kind::Constant)
+ case spv::OpTypeStruct:
{
- constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]);
+ int memberIndex = GetConstScalarInt(indexIds[i]);
+ ApplyDecorationsForIdMember(&d, typeId, memberIndex);
+ ASSERT(d.HasOffset);
+ constantOffset += d.Offset;
+ typeId = type.definition.word(2u + memberIndex);
+ break;
}
- else
+ case spv::OpTypeArray:
+ case spv::OpTypeRuntimeArray:
{
- ptr += SIMD::Int(d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(0);
+ // TODO: b/127950082: Check bounds.
+ ASSERT(d.HasArrayStride);
+ auto &obj = getObject(indexIds[i]);
+ if(obj.kind == Object::Kind::Constant)
+ {
+ constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]);
+ }
+ else
+ {
+ ptr += SIMD::Int(d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(0);
+ }
+ typeId = type.element;
+ break;
}
- typeId = type.element;
- break;
- }
- case spv::OpTypeMatrix:
- {
- // TODO: b/127950082: Check bounds.
- ASSERT(d.HasMatrixStride);
- d.InsideMatrix = true;
- auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
- auto & obj = getObject(indexIds[i]);
- if(obj.kind == Object::Kind::Constant)
+ case spv::OpTypeMatrix:
{
- constantOffset += columnStride * GetConstScalarInt(indexIds[i]);
+ // TODO: b/127950082: Check bounds.
+ ASSERT(d.HasMatrixStride);
+ d.InsideMatrix = true;
+ auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
+ auto &obj = getObject(indexIds[i]);
+ if(obj.kind == Object::Kind::Constant)
+ {
+ constantOffset += columnStride * GetConstScalarInt(indexIds[i]);
+ }
+ else
+ {
+ ptr += SIMD::Int(columnStride) * state->getIntermediate(indexIds[i]).Int(0);
+ }
+ typeId = type.element;
+ break;
}
- else
+ case spv::OpTypeVector:
{
- ptr += SIMD::Int(columnStride) * state->getIntermediate(indexIds[i]).Int(0);
+ auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
+ auto &obj = getObject(indexIds[i]);
+ if(obj.kind == Object::Kind::Constant)
+ {
+ constantOffset += elemStride * GetConstScalarInt(indexIds[i]);
+ }
+ else
+ {
+ ptr += SIMD::Int(elemStride) * state->getIntermediate(indexIds[i]).Int(0);
+ }
+ typeId = type.element;
+ break;
}
- typeId = type.element;
- break;
- }
- case spv::OpTypeVector:
- {
- auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
- auto & obj = getObject(indexIds[i]);
- if(obj.kind == Object::Kind::Constant)
- {
- constantOffset += elemStride * GetConstScalarInt(indexIds[i]);
- }
- else
- {
- ptr += SIMD::Int(elemStride) * state->getIntermediate(indexIds[i]).Int(0);
- }
- typeId = type.element;
- break;
- }
- default:
- UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
+ default:
+ UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
}
}
@@ -1143,63 +1146,64 @@
for(auto i = 0u; i < numIndexes; i++)
{
- auto & type = getType(typeId);
+ auto &type = getType(typeId);
switch(type.opcode())
{
- case spv::OpTypeStruct:
- {
- int memberIndex = GetConstScalarInt(indexIds[i]);
- int offsetIntoStruct = 0;
- for(auto j = 0; j < memberIndex; j++) {
- auto memberType = type.definition.word(2u + j);
- offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
- }
- constantOffset += offsetIntoStruct;
- typeId = type.definition.word(2u + memberIndex);
- break;
- }
-
- case spv::OpTypeVector:
- case spv::OpTypeMatrix:
- case spv::OpTypeArray:
- case spv::OpTypeRuntimeArray:
- {
- // TODO: b/127950082: Check bounds.
- if(getType(baseObject.type).storageClass == spv::StorageClassUniformConstant)
+ case spv::OpTypeStruct:
{
- // indexing into an array of descriptors.
- auto &obj = getObject(indexIds[i]);
- if(obj.kind != Object::Kind::Constant)
+ int memberIndex = GetConstScalarInt(indexIds[i]);
+ int offsetIntoStruct = 0;
+ for(auto j = 0; j < memberIndex; j++)
{
- UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability");
+ auto memberType = type.definition.word(2u + j);
+ offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
}
-
- auto d = descriptorDecorations.at(baseId);
- ASSERT(d.DescriptorSet >= 0);
- ASSERT(d.Binding >= 0);
- auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
- auto stride = static_cast<uint32_t>(setLayout->getBindingStride(d.Binding));
- ptr.base += stride * GetConstScalarInt(indexIds[i]);
+ constantOffset += offsetIntoStruct;
+ typeId = type.definition.word(2u + memberIndex);
+ break;
}
- else
+
+ case spv::OpTypeVector:
+ case spv::OpTypeMatrix:
+ case spv::OpTypeArray:
+ case spv::OpTypeRuntimeArray:
{
- auto stride = getType(type.element).sizeInComponents * static_cast<uint32_t>(sizeof(float));
- auto & obj = getObject(indexIds[i]);
- if(obj.kind == Object::Kind::Constant)
+ // TODO: b/127950082: Check bounds.
+ if(getType(baseObject.type).storageClass == spv::StorageClassUniformConstant)
{
- ptr += stride * GetConstScalarInt(indexIds[i]);
+ // indexing into an array of descriptors.
+ auto &obj = getObject(indexIds[i]);
+ if(obj.kind != Object::Kind::Constant)
+ {
+ UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability");
+ }
+
+ auto d = descriptorDecorations.at(baseId);
+ ASSERT(d.DescriptorSet >= 0);
+ ASSERT(d.Binding >= 0);
+ auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
+ auto stride = static_cast<uint32_t>(setLayout->getBindingStride(d.Binding));
+ ptr.base += stride * GetConstScalarInt(indexIds[i]);
}
else
{
- ptr += SIMD::Int(stride) * state->getIntermediate(indexIds[i]).Int(0);
+ auto stride = getType(type.element).sizeInComponents * static_cast<uint32_t>(sizeof(float));
+ auto &obj = getObject(indexIds[i]);
+ if(obj.kind == Object::Kind::Constant)
+ {
+ ptr += stride * GetConstScalarInt(indexIds[i]);
+ }
+ else
+ {
+ ptr += SIMD::Int(stride) * state->getIntermediate(indexIds[i]).Int(0);
+ }
}
+ typeId = type.element;
+ break;
}
- typeId = type.element;
- break;
- }
- default:
- UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
+ default:
+ UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
}
}
@@ -1216,35 +1220,36 @@
for(auto i = 0u; i < numIndexes; i++)
{
- auto & type = getType(typeId);
+ auto &type = getType(typeId);
switch(type.opcode())
{
- case spv::OpTypeStruct:
- {
- int memberIndex = indexes[i];
- int offsetIntoStruct = 0;
- for(auto j = 0; j < memberIndex; j++) {
- auto memberType = type.definition.word(2u + j);
- offsetIntoStruct += getType(memberType).sizeInComponents;
+ case spv::OpTypeStruct:
+ {
+ int memberIndex = indexes[i];
+ int offsetIntoStruct = 0;
+ for(auto j = 0; j < memberIndex; j++)
+ {
+ auto memberType = type.definition.word(2u + j);
+ offsetIntoStruct += getType(memberType).sizeInComponents;
+ }
+ componentOffset += offsetIntoStruct;
+ typeId = type.definition.word(2u + memberIndex);
+ break;
}
- componentOffset += offsetIntoStruct;
- typeId = type.definition.word(2u + memberIndex);
- break;
- }
- case spv::OpTypeVector:
- case spv::OpTypeMatrix:
- case spv::OpTypeArray:
- {
- auto elementType = type.definition.word(2);
- auto stride = getType(elementType).sizeInComponents;
- componentOffset += stride * indexes[i];
- typeId = elementType;
- break;
- }
+ case spv::OpTypeVector:
+ case spv::OpTypeMatrix:
+ case spv::OpTypeArray:
+ {
+ auto elementType = type.definition.word(2);
+ auto stride = getType(elementType).sizeInComponents;
+ componentOffset += stride * indexes[i];
+ typeId = elementType;
+ break;
+ }
- default:
- UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
+ default:
+ UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
}
}
@@ -1255,58 +1260,58 @@
{
switch(decoration)
{
- case spv::DecorationLocation:
- HasLocation = true;
- Location = static_cast<int32_t>(arg);
- break;
- case spv::DecorationComponent:
- HasComponent = true;
- Component = arg;
- break;
- case spv::DecorationBuiltIn:
- HasBuiltIn = true;
- BuiltIn = static_cast<spv::BuiltIn>(arg);
- break;
- case spv::DecorationFlat:
- Flat = true;
- break;
- case spv::DecorationNoPerspective:
- NoPerspective = true;
- break;
- case spv::DecorationCentroid:
- Centroid = true;
- break;
- case spv::DecorationBlock:
- Block = true;
- break;
- case spv::DecorationBufferBlock:
- BufferBlock = true;
- break;
- case spv::DecorationOffset:
- HasOffset = true;
- Offset = static_cast<int32_t>(arg);
- break;
- case spv::DecorationArrayStride:
- HasArrayStride = true;
- ArrayStride = static_cast<int32_t>(arg);
- break;
- case spv::DecorationMatrixStride:
- HasMatrixStride = true;
- MatrixStride = static_cast<int32_t>(arg);
- break;
- case spv::DecorationRelaxedPrecision:
- RelaxedPrecision = true;
- break;
- case spv::DecorationRowMajor:
- HasRowMajor = true;
- RowMajor = true;
- break;
- case spv::DecorationColMajor:
- HasRowMajor = true;
- RowMajor = false;
- default:
- // Intentionally partial, there are many decorations we just don't care about.
- break;
+ case spv::DecorationLocation:
+ HasLocation = true;
+ Location = static_cast<int32_t>(arg);
+ break;
+ case spv::DecorationComponent:
+ HasComponent = true;
+ Component = arg;
+ break;
+ case spv::DecorationBuiltIn:
+ HasBuiltIn = true;
+ BuiltIn = static_cast<spv::BuiltIn>(arg);
+ break;
+ case spv::DecorationFlat:
+ Flat = true;
+ break;
+ case spv::DecorationNoPerspective:
+ NoPerspective = true;
+ break;
+ case spv::DecorationCentroid:
+ Centroid = true;
+ break;
+ case spv::DecorationBlock:
+ Block = true;
+ break;
+ case spv::DecorationBufferBlock:
+ BufferBlock = true;
+ break;
+ case spv::DecorationOffset:
+ HasOffset = true;
+ Offset = static_cast<int32_t>(arg);
+ break;
+ case spv::DecorationArrayStride:
+ HasArrayStride = true;
+ ArrayStride = static_cast<int32_t>(arg);
+ break;
+ case spv::DecorationMatrixStride:
+ HasMatrixStride = true;
+ MatrixStride = static_cast<int32_t>(arg);
+ break;
+ case spv::DecorationRelaxedPrecision:
+ RelaxedPrecision = true;
+ break;
+ case spv::DecorationRowMajor:
+ HasRowMajor = true;
+ RowMajor = true;
+ break;
+ case spv::DecorationColMajor:
+ HasRowMajor = true;
+ RowMajor = false;
+ default:
+ // Intentionally partial, there are many decorations we just don't care about.
+ break;
}
}
@@ -1407,15 +1412,15 @@
switch(getType(typeId).opcode())
{
- case spv::OpTypePointer:
- case spv::OpTypeImage:
- case spv::OpTypeSampledImage:
- case spv::OpTypeSampler:
- object.kind = Object::Kind::Pointer;
- break;
+ case spv::OpTypePointer:
+ case spv::OpTypeImage:
+ case spv::OpTypeSampledImage:
+ case spv::OpTypeSampler:
+ object.kind = Object::Kind::Pointer;
+ break;
- default:
- object.kind = Object::Kind::Intermediate;
+ default:
+ object.kind = Object::Kind::Intermediate;
}
object.definition = insn;
@@ -1425,27 +1430,27 @@
{
switch(storageClass)
{
- case spv::StorageClassUniform:
- case spv::StorageClassStorageBuffer:
- // Buffer resource access. robustBufferAccess feature applies.
- return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
- : OutOfBoundsBehavior::UndefinedBehavior;
-
- case spv::StorageClassImage:
- return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined"
-
- case spv::StorageClassInput:
- if(executionModel == spv::ExecutionModelVertex)
- {
- // Vertex attributes follow robustBufferAccess rules.
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
+ // Buffer resource access. robustBufferAccess feature applies.
return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
: OutOfBoundsBehavior::UndefinedBehavior;
- }
- // Fall through to default case.
- default:
- // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
- // TODO(b/131224163): Optimize cases statically known to be within bounds.
- return OutOfBoundsBehavior::UndefinedValue;
+
+ case spv::StorageClassImage:
+ return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined"
+
+ case spv::StorageClassInput:
+ if(executionModel == spv::ExecutionModelVertex)
+ {
+ // Vertex attributes follow robustBufferAccess rules.
+ return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
+ : OutOfBoundsBehavior::UndefinedBehavior;
+ }
+ // Fall through to default case.
+ default:
+ // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
+ // TODO(b/131224163): Optimize cases statically known to be within bounds.
+ return OutOfBoundsBehavior::UndefinedValue;
}
return OutOfBoundsBehavior::Nullify;
@@ -1459,48 +1464,48 @@
{
switch(insn.opcode())
{
- case spv::OpVariable:
- {
- Type::ID resultPointerTypeId = insn.word(1);
- auto resultPointerType = getType(resultPointerTypeId);
- auto pointeeType = getType(resultPointerType.element);
+ case spv::OpVariable:
+ {
+ Type::ID resultPointerTypeId = insn.word(1);
+ auto resultPointerType = getType(resultPointerTypeId);
+ auto pointeeType = getType(resultPointerType.element);
- if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
+ if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
+ {
+ Object::ID resultId = insn.word(2);
+ routine->createVariable(resultId, pointeeType.sizeInComponents);
+ }
+ break;
+ }
+ case spv::OpPhi:
+ {
+ auto type = getType(insn.word(1));
+ Object::ID resultId = insn.word(2);
+ routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
+ break;
+ }
+
+ case spv::OpImageDrefGather:
+ case spv::OpImageFetch:
+ case spv::OpImageGather:
+ case spv::OpImageQueryLod:
+ case spv::OpImageSampleDrefExplicitLod:
+ case spv::OpImageSampleDrefImplicitLod:
+ case spv::OpImageSampleExplicitLod:
+ case spv::OpImageSampleImplicitLod:
+ case spv::OpImageSampleProjDrefExplicitLod:
+ case spv::OpImageSampleProjDrefImplicitLod:
+ case spv::OpImageSampleProjExplicitLod:
+ case spv::OpImageSampleProjImplicitLod:
{
Object::ID resultId = insn.word(2);
- routine->createVariable(resultId, pointeeType.sizeInComponents);
+ routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
+ break;
}
- break;
- }
- case spv::OpPhi:
- {
- auto type = getType(insn.word(1));
- Object::ID resultId = insn.word(2);
- routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
- break;
- }
- case spv::OpImageDrefGather:
- case spv::OpImageFetch:
- case spv::OpImageGather:
- case spv::OpImageQueryLod:
- case spv::OpImageSampleDrefExplicitLod:
- case spv::OpImageSampleDrefImplicitLod:
- case spv::OpImageSampleExplicitLod:
- case spv::OpImageSampleImplicitLod:
- case spv::OpImageSampleProjDrefExplicitLod:
- case spv::OpImageSampleProjDrefImplicitLod:
- case spv::OpImageSampleProjExplicitLod:
- case spv::OpImageSampleProjImplicitLod:
- {
- Object::ID resultId = insn.word(2);
- routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
- break;
- }
-
- default:
- // Nothing else produces interface variables, so can all be safely ignored.
- break;
+ default:
+ // Nothing else produces interface variables, so can all be safely ignored.
+ break;
}
}
}
@@ -1531,13 +1536,13 @@
auto res = EmitInstruction(insn, state);
switch(res)
{
- case EmitResult::Continue:
- continue;
- case EmitResult::Terminator:
- break;
- default:
- UNREACHABLE("Unexpected EmitResult %d", int(res));
- break;
+ case EmitResult::Continue:
+ continue;
+ case EmitResult::Terminator:
+ break;
+ default:
+ UNREACHABLE("Unexpected EmitResult %d", int(res));
+ break;
}
}
}
@@ -1548,365 +1553,365 @@
switch(opcode)
{
- case spv::OpTypeVoid:
- case spv::OpTypeInt:
- case spv::OpTypeFloat:
- case spv::OpTypeBool:
- case spv::OpTypeVector:
- case spv::OpTypeArray:
- case spv::OpTypeRuntimeArray:
- case spv::OpTypeMatrix:
- case spv::OpTypeStruct:
- case spv::OpTypePointer:
- case spv::OpTypeFunction:
- case spv::OpTypeImage:
- case spv::OpTypeSampledImage:
- case spv::OpTypeSampler:
- case spv::OpExecutionMode:
- case spv::OpMemoryModel:
- case spv::OpFunction:
- case spv::OpFunctionEnd:
- case spv::OpConstant:
- case spv::OpConstantNull:
- case spv::OpConstantTrue:
- case spv::OpConstantFalse:
- case spv::OpConstantComposite:
- case spv::OpSpecConstant:
- case spv::OpSpecConstantTrue:
- case spv::OpSpecConstantFalse:
- case spv::OpSpecConstantComposite:
- case spv::OpSpecConstantOp:
- case spv::OpUndef:
- case spv::OpExtension:
- case spv::OpCapability:
- case spv::OpEntryPoint:
- case spv::OpExtInstImport:
- case spv::OpDecorate:
- case spv::OpMemberDecorate:
- case spv::OpGroupDecorate:
- case spv::OpGroupMemberDecorate:
- case spv::OpDecorationGroup:
- case spv::OpName:
- case spv::OpMemberName:
- case spv::OpSource:
- case spv::OpSourceContinued:
- case spv::OpSourceExtension:
- case spv::OpLine:
- case spv::OpNoLine:
- case spv::OpModuleProcessed:
- case spv::OpString:
- // Nothing to do at emit time. These are either fully handled at analysis time,
- // or don't require any work at all.
- return EmitResult::Continue;
+ case spv::OpTypeVoid:
+ case spv::OpTypeInt:
+ case spv::OpTypeFloat:
+ case spv::OpTypeBool:
+ case spv::OpTypeVector:
+ case spv::OpTypeArray:
+ case spv::OpTypeRuntimeArray:
+ case spv::OpTypeMatrix:
+ case spv::OpTypeStruct:
+ case spv::OpTypePointer:
+ case spv::OpTypeFunction:
+ case spv::OpTypeImage:
+ case spv::OpTypeSampledImage:
+ case spv::OpTypeSampler:
+ case spv::OpExecutionMode:
+ case spv::OpMemoryModel:
+ case spv::OpFunction:
+ case spv::OpFunctionEnd:
+ case spv::OpConstant:
+ case spv::OpConstantNull:
+ case spv::OpConstantTrue:
+ case spv::OpConstantFalse:
+ case spv::OpConstantComposite:
+ case spv::OpSpecConstant:
+ case spv::OpSpecConstantTrue:
+ case spv::OpSpecConstantFalse:
+ case spv::OpSpecConstantComposite:
+ case spv::OpSpecConstantOp:
+ case spv::OpUndef:
+ case spv::OpExtension:
+ case spv::OpCapability:
+ case spv::OpEntryPoint:
+ case spv::OpExtInstImport:
+ case spv::OpDecorate:
+ case spv::OpMemberDecorate:
+ case spv::OpGroupDecorate:
+ case spv::OpGroupMemberDecorate:
+ case spv::OpDecorationGroup:
+ case spv::OpName:
+ case spv::OpMemberName:
+ case spv::OpSource:
+ case spv::OpSourceContinued:
+ case spv::OpSourceExtension:
+ case spv::OpLine:
+ case spv::OpNoLine:
+ case spv::OpModuleProcessed:
+ case spv::OpString:
+ // Nothing to do at emit time. These are either fully handled at analysis time,
+ // or don't require any work at all.
+ return EmitResult::Continue;
- case spv::OpLabel:
- return EmitResult::Continue;
+ case spv::OpLabel:
+ return EmitResult::Continue;
- case spv::OpVariable:
- return EmitVariable(insn, state);
+ case spv::OpVariable:
+ return EmitVariable(insn, state);
- case spv::OpLoad:
- case spv::OpAtomicLoad:
- return EmitLoad(insn, state);
+ case spv::OpLoad:
+ case spv::OpAtomicLoad:
+ return EmitLoad(insn, state);
- case spv::OpStore:
- case spv::OpAtomicStore:
- return EmitStore(insn, state);
+ case spv::OpStore:
+ case spv::OpAtomicStore:
+ return EmitStore(insn, state);
- case spv::OpAtomicIAdd:
- case spv::OpAtomicISub:
- case spv::OpAtomicSMin:
- case spv::OpAtomicSMax:
- case spv::OpAtomicUMin:
- case spv::OpAtomicUMax:
- case spv::OpAtomicAnd:
- case spv::OpAtomicOr:
- case spv::OpAtomicXor:
- case spv::OpAtomicIIncrement:
- case spv::OpAtomicIDecrement:
- case spv::OpAtomicExchange:
- return EmitAtomicOp(insn, state);
+ case spv::OpAtomicIAdd:
+ case spv::OpAtomicISub:
+ case spv::OpAtomicSMin:
+ case spv::OpAtomicSMax:
+ case spv::OpAtomicUMin:
+ case spv::OpAtomicUMax:
+ case spv::OpAtomicAnd:
+ case spv::OpAtomicOr:
+ case spv::OpAtomicXor:
+ case spv::OpAtomicIIncrement:
+ case spv::OpAtomicIDecrement:
+ case spv::OpAtomicExchange:
+ return EmitAtomicOp(insn, state);
- case spv::OpAtomicCompareExchange:
- return EmitAtomicCompareExchange(insn, state);
+ case spv::OpAtomicCompareExchange:
+ return EmitAtomicCompareExchange(insn, state);
- case spv::OpAccessChain:
- case spv::OpInBoundsAccessChain:
- return EmitAccessChain(insn, state);
+ case spv::OpAccessChain:
+ case spv::OpInBoundsAccessChain:
+ return EmitAccessChain(insn, state);
- case spv::OpCompositeConstruct:
- return EmitCompositeConstruct(insn, state);
+ case spv::OpCompositeConstruct:
+ return EmitCompositeConstruct(insn, state);
- case spv::OpCompositeInsert:
- return EmitCompositeInsert(insn, state);
+ case spv::OpCompositeInsert:
+ return EmitCompositeInsert(insn, state);
- case spv::OpCompositeExtract:
- return EmitCompositeExtract(insn, state);
+ case spv::OpCompositeExtract:
+ return EmitCompositeExtract(insn, state);
- case spv::OpVectorShuffle:
- return EmitVectorShuffle(insn, state);
+ case spv::OpVectorShuffle:
+ return EmitVectorShuffle(insn, state);
- case spv::OpVectorExtractDynamic:
- return EmitVectorExtractDynamic(insn, state);
+ case spv::OpVectorExtractDynamic:
+ return EmitVectorExtractDynamic(insn, state);
- case spv::OpVectorInsertDynamic:
- return EmitVectorInsertDynamic(insn, state);
+ case spv::OpVectorInsertDynamic:
+ return EmitVectorInsertDynamic(insn, state);
- case spv::OpVectorTimesScalar:
- case spv::OpMatrixTimesScalar:
- return EmitVectorTimesScalar(insn, state);
+ case spv::OpVectorTimesScalar:
+ case spv::OpMatrixTimesScalar:
+ return EmitVectorTimesScalar(insn, state);
- case spv::OpMatrixTimesVector:
- return EmitMatrixTimesVector(insn, state);
+ case spv::OpMatrixTimesVector:
+ return EmitMatrixTimesVector(insn, state);
- case spv::OpVectorTimesMatrix:
- return EmitVectorTimesMatrix(insn, state);
+ case spv::OpVectorTimesMatrix:
+ return EmitVectorTimesMatrix(insn, state);
- case spv::OpMatrixTimesMatrix:
- return EmitMatrixTimesMatrix(insn, state);
+ case spv::OpMatrixTimesMatrix:
+ return EmitMatrixTimesMatrix(insn, state);
- case spv::OpOuterProduct:
- return EmitOuterProduct(insn, state);
+ case spv::OpOuterProduct:
+ return EmitOuterProduct(insn, state);
- case spv::OpTranspose:
- return EmitTranspose(insn, state);
+ case spv::OpTranspose:
+ return EmitTranspose(insn, state);
- case spv::OpNot:
- case spv::OpBitFieldInsert:
- case spv::OpBitFieldSExtract:
- case spv::OpBitFieldUExtract:
- case spv::OpBitReverse:
- case spv::OpBitCount:
- case spv::OpSNegate:
- case spv::OpFNegate:
- case spv::OpLogicalNot:
- case spv::OpConvertFToU:
- case spv::OpConvertFToS:
- case spv::OpConvertSToF:
- case spv::OpConvertUToF:
- case spv::OpBitcast:
- case spv::OpIsInf:
- case spv::OpIsNan:
- case spv::OpDPdx:
- case spv::OpDPdxCoarse:
- case spv::OpDPdy:
- case spv::OpDPdyCoarse:
- case spv::OpFwidth:
- case spv::OpFwidthCoarse:
- case spv::OpDPdxFine:
- case spv::OpDPdyFine:
- case spv::OpFwidthFine:
- case spv::OpQuantizeToF16:
- return EmitUnaryOp(insn, state);
+ case spv::OpNot:
+ case spv::OpBitFieldInsert:
+ case spv::OpBitFieldSExtract:
+ case spv::OpBitFieldUExtract:
+ case spv::OpBitReverse:
+ case spv::OpBitCount:
+ case spv::OpSNegate:
+ case spv::OpFNegate:
+ case spv::OpLogicalNot:
+ case spv::OpConvertFToU:
+ case spv::OpConvertFToS:
+ case spv::OpConvertSToF:
+ case spv::OpConvertUToF:
+ case spv::OpBitcast:
+ case spv::OpIsInf:
+ case spv::OpIsNan:
+ case spv::OpDPdx:
+ case spv::OpDPdxCoarse:
+ case spv::OpDPdy:
+ case spv::OpDPdyCoarse:
+ case spv::OpFwidth:
+ case spv::OpFwidthCoarse:
+ case spv::OpDPdxFine:
+ case spv::OpDPdyFine:
+ case spv::OpFwidthFine:
+ case spv::OpQuantizeToF16:
+ return EmitUnaryOp(insn, state);
- case spv::OpIAdd:
- case spv::OpISub:
- case spv::OpIMul:
- case spv::OpSDiv:
- case spv::OpUDiv:
- case spv::OpFAdd:
- case spv::OpFSub:
- case spv::OpFMul:
- case spv::OpFDiv:
- case spv::OpFMod:
- case spv::OpFRem:
- case spv::OpFOrdEqual:
- case spv::OpFUnordEqual:
- case spv::OpFOrdNotEqual:
- case spv::OpFUnordNotEqual:
- case spv::OpFOrdLessThan:
- case spv::OpFUnordLessThan:
- case spv::OpFOrdGreaterThan:
- case spv::OpFUnordGreaterThan:
- case spv::OpFOrdLessThanEqual:
- case spv::OpFUnordLessThanEqual:
- case spv::OpFOrdGreaterThanEqual:
- case spv::OpFUnordGreaterThanEqual:
- case spv::OpSMod:
- case spv::OpSRem:
- case spv::OpUMod:
- case spv::OpIEqual:
- case spv::OpINotEqual:
- case spv::OpUGreaterThan:
- case spv::OpSGreaterThan:
- case spv::OpUGreaterThanEqual:
- case spv::OpSGreaterThanEqual:
- case spv::OpULessThan:
- case spv::OpSLessThan:
- case spv::OpULessThanEqual:
- case spv::OpSLessThanEqual:
- case spv::OpShiftRightLogical:
- case spv::OpShiftRightArithmetic:
- case spv::OpShiftLeftLogical:
- case spv::OpBitwiseOr:
- case spv::OpBitwiseXor:
- case spv::OpBitwiseAnd:
- case spv::OpLogicalOr:
- case spv::OpLogicalAnd:
- case spv::OpLogicalEqual:
- case spv::OpLogicalNotEqual:
- case spv::OpUMulExtended:
- case spv::OpSMulExtended:
- case spv::OpIAddCarry:
- case spv::OpISubBorrow:
- return EmitBinaryOp(insn, state);
+ case spv::OpIAdd:
+ case spv::OpISub:
+ case spv::OpIMul:
+ case spv::OpSDiv:
+ case spv::OpUDiv:
+ case spv::OpFAdd:
+ case spv::OpFSub:
+ case spv::OpFMul:
+ case spv::OpFDiv:
+ case spv::OpFMod:
+ case spv::OpFRem:
+ case spv::OpFOrdEqual:
+ case spv::OpFUnordEqual:
+ case spv::OpFOrdNotEqual:
+ case spv::OpFUnordNotEqual:
+ case spv::OpFOrdLessThan:
+ case spv::OpFUnordLessThan:
+ case spv::OpFOrdGreaterThan:
+ case spv::OpFUnordGreaterThan:
+ case spv::OpFOrdLessThanEqual:
+ case spv::OpFUnordLessThanEqual:
+ case spv::OpFOrdGreaterThanEqual:
+ case spv::OpFUnordGreaterThanEqual:
+ case spv::OpSMod:
+ case spv::OpSRem:
+ case spv::OpUMod:
+ case spv::OpIEqual:
+ case spv::OpINotEqual:
+ case spv::OpUGreaterThan:
+ case spv::OpSGreaterThan:
+ case spv::OpUGreaterThanEqual:
+ case spv::OpSGreaterThanEqual:
+ case spv::OpULessThan:
+ case spv::OpSLessThan:
+ case spv::OpULessThanEqual:
+ case spv::OpSLessThanEqual:
+ case spv::OpShiftRightLogical:
+ case spv::OpShiftRightArithmetic:
+ case spv::OpShiftLeftLogical:
+ case spv::OpBitwiseOr:
+ case spv::OpBitwiseXor:
+ case spv::OpBitwiseAnd:
+ case spv::OpLogicalOr:
+ case spv::OpLogicalAnd:
+ case spv::OpLogicalEqual:
+ case spv::OpLogicalNotEqual:
+ case spv::OpUMulExtended:
+ case spv::OpSMulExtended:
+ case spv::OpIAddCarry:
+ case spv::OpISubBorrow:
+ return EmitBinaryOp(insn, state);
- case spv::OpDot:
- return EmitDot(insn, state);
+ case spv::OpDot:
+ return EmitDot(insn, state);
- case spv::OpSelect:
- return EmitSelect(insn, state);
+ case spv::OpSelect:
+ return EmitSelect(insn, state);
- case spv::OpExtInst:
- return EmitExtendedInstruction(insn, state);
+ case spv::OpExtInst:
+ return EmitExtendedInstruction(insn, state);
- case spv::OpAny:
- return EmitAny(insn, state);
+ case spv::OpAny:
+ return EmitAny(insn, state);
- case spv::OpAll:
- return EmitAll(insn, state);
+ case spv::OpAll:
+ return EmitAll(insn, state);
- case spv::OpBranch:
- return EmitBranch(insn, state);
+ case spv::OpBranch:
+ return EmitBranch(insn, state);
- case spv::OpPhi:
- return EmitPhi(insn, state);
+ case spv::OpPhi:
+ return EmitPhi(insn, state);
- case spv::OpSelectionMerge:
- case spv::OpLoopMerge:
- return EmitResult::Continue;
+ case spv::OpSelectionMerge:
+ case spv::OpLoopMerge:
+ return EmitResult::Continue;
- case spv::OpBranchConditional:
- return EmitBranchConditional(insn, state);
+ case spv::OpBranchConditional:
+ return EmitBranchConditional(insn, state);
- case spv::OpSwitch:
- return EmitSwitch(insn, state);
+ case spv::OpSwitch:
+ return EmitSwitch(insn, state);
- case spv::OpUnreachable:
- return EmitUnreachable(insn, state);
+ case spv::OpUnreachable:
+ return EmitUnreachable(insn, state);
- case spv::OpReturn:
- return EmitReturn(insn, state);
+ case spv::OpReturn:
+ return EmitReturn(insn, state);
- case spv::OpFunctionCall:
- return EmitFunctionCall(insn, state);
+ case spv::OpFunctionCall:
+ return EmitFunctionCall(insn, state);
- case spv::OpKill:
- return EmitKill(insn, state);
+ case spv::OpKill:
+ return EmitKill(insn, state);
- case spv::OpImageSampleImplicitLod:
- return EmitImageSampleImplicitLod(None, insn, state);
+ case spv::OpImageSampleImplicitLod:
+ return EmitImageSampleImplicitLod(None, insn, state);
- case spv::OpImageSampleExplicitLod:
- return EmitImageSampleExplicitLod(None, insn, state);
+ case spv::OpImageSampleExplicitLod:
+ return EmitImageSampleExplicitLod(None, insn, state);
- case spv::OpImageSampleDrefImplicitLod:
- return EmitImageSampleImplicitLod(Dref, insn, state);
+ case spv::OpImageSampleDrefImplicitLod:
+ return EmitImageSampleImplicitLod(Dref, insn, state);
- case spv::OpImageSampleDrefExplicitLod:
- return EmitImageSampleExplicitLod(Dref, insn, state);
+ case spv::OpImageSampleDrefExplicitLod:
+ return EmitImageSampleExplicitLod(Dref, insn, state);
- case spv::OpImageSampleProjImplicitLod:
- return EmitImageSampleImplicitLod(Proj, insn, state);
+ case spv::OpImageSampleProjImplicitLod:
+ return EmitImageSampleImplicitLod(Proj, insn, state);
- case spv::OpImageSampleProjExplicitLod:
- return EmitImageSampleExplicitLod(Proj, insn, state);
+ case spv::OpImageSampleProjExplicitLod:
+ return EmitImageSampleExplicitLod(Proj, insn, state);
- case spv::OpImageSampleProjDrefImplicitLod:
- return EmitImageSampleImplicitLod(ProjDref, insn, state);
+ case spv::OpImageSampleProjDrefImplicitLod:
+ return EmitImageSampleImplicitLod(ProjDref, insn, state);
- case spv::OpImageSampleProjDrefExplicitLod:
- return EmitImageSampleExplicitLod(ProjDref, insn, state);
+ case spv::OpImageSampleProjDrefExplicitLod:
+ return EmitImageSampleExplicitLod(ProjDref, insn, state);
- case spv::OpImageGather:
- return EmitImageGather(None, insn, state);
+ case spv::OpImageGather:
+ return EmitImageGather(None, insn, state);
- case spv::OpImageDrefGather:
- return EmitImageGather(Dref, insn, state);
+ case spv::OpImageDrefGather:
+ return EmitImageGather(Dref, insn, state);
- case spv::OpImageFetch:
- return EmitImageFetch(insn, state);
+ case spv::OpImageFetch:
+ return EmitImageFetch(insn, state);
- case spv::OpImageQuerySizeLod:
- return EmitImageQuerySizeLod(insn, state);
+ case spv::OpImageQuerySizeLod:
+ return EmitImageQuerySizeLod(insn, state);
- case spv::OpImageQuerySize:
- return EmitImageQuerySize(insn, state);
+ case spv::OpImageQuerySize:
+ return EmitImageQuerySize(insn, state);
- case spv::OpImageQueryLod:
- return EmitImageQueryLod(insn, state);
+ case spv::OpImageQueryLod:
+ return EmitImageQueryLod(insn, state);
- case spv::OpImageQueryLevels:
- return EmitImageQueryLevels(insn, state);
+ case spv::OpImageQueryLevels:
+ return EmitImageQueryLevels(insn, state);
- case spv::OpImageQuerySamples:
- return EmitImageQuerySamples(insn, state);
+ case spv::OpImageQuerySamples:
+ return EmitImageQuerySamples(insn, state);
- case spv::OpImageRead:
- return EmitImageRead(insn, state);
+ case spv::OpImageRead:
+ return EmitImageRead(insn, state);
- case spv::OpImageWrite:
- return EmitImageWrite(insn, state);
+ case spv::OpImageWrite:
+ return EmitImageWrite(insn, state);
- case spv::OpImageTexelPointer:
- return EmitImageTexelPointer(insn, state);
+ case spv::OpImageTexelPointer:
+ return EmitImageTexelPointer(insn, state);
- case spv::OpSampledImage:
- case spv::OpImage:
- return EmitSampledImageCombineOrSplit(insn, state);
+ case spv::OpSampledImage:
+ case spv::OpImage:
+ return EmitSampledImageCombineOrSplit(insn, state);
- case spv::OpCopyObject:
- return EmitCopyObject(insn, state);
+ case spv::OpCopyObject:
+ return EmitCopyObject(insn, state);
- case spv::OpCopyMemory:
- return EmitCopyMemory(insn, state);
+ case spv::OpCopyMemory:
+ return EmitCopyMemory(insn, state);
- case spv::OpControlBarrier:
- return EmitControlBarrier(insn, state);
+ case spv::OpControlBarrier:
+ return EmitControlBarrier(insn, state);
- case spv::OpMemoryBarrier:
- return EmitMemoryBarrier(insn, state);
+ case spv::OpMemoryBarrier:
+ return EmitMemoryBarrier(insn, state);
- case spv::OpGroupNonUniformElect:
- case spv::OpGroupNonUniformAll:
- case spv::OpGroupNonUniformAny:
- case spv::OpGroupNonUniformAllEqual:
- case spv::OpGroupNonUniformBroadcast:
- case spv::OpGroupNonUniformBroadcastFirst:
- case spv::OpGroupNonUniformBallot:
- case spv::OpGroupNonUniformInverseBallot:
- case spv::OpGroupNonUniformBallotBitExtract:
- case spv::OpGroupNonUniformBallotBitCount:
- case spv::OpGroupNonUniformBallotFindLSB:
- case spv::OpGroupNonUniformBallotFindMSB:
- case spv::OpGroupNonUniformShuffle:
- case spv::OpGroupNonUniformShuffleXor:
- case spv::OpGroupNonUniformShuffleUp:
- case spv::OpGroupNonUniformShuffleDown:
- case spv::OpGroupNonUniformIAdd:
- case spv::OpGroupNonUniformFAdd:
- case spv::OpGroupNonUniformIMul:
- case spv::OpGroupNonUniformFMul:
- case spv::OpGroupNonUniformSMin:
- case spv::OpGroupNonUniformUMin:
- case spv::OpGroupNonUniformFMin:
- case spv::OpGroupNonUniformSMax:
- case spv::OpGroupNonUniformUMax:
- case spv::OpGroupNonUniformFMax:
- case spv::OpGroupNonUniformBitwiseAnd:
- case spv::OpGroupNonUniformBitwiseOr:
- case spv::OpGroupNonUniformBitwiseXor:
- case spv::OpGroupNonUniformLogicalAnd:
- case spv::OpGroupNonUniformLogicalOr:
- case spv::OpGroupNonUniformLogicalXor:
- return EmitGroupNonUniform(insn, state);
+ case spv::OpGroupNonUniformElect:
+ case spv::OpGroupNonUniformAll:
+ case spv::OpGroupNonUniformAny:
+ case spv::OpGroupNonUniformAllEqual:
+ case spv::OpGroupNonUniformBroadcast:
+ case spv::OpGroupNonUniformBroadcastFirst:
+ case spv::OpGroupNonUniformBallot:
+ case spv::OpGroupNonUniformInverseBallot:
+ case spv::OpGroupNonUniformBallotBitExtract:
+ case spv::OpGroupNonUniformBallotBitCount:
+ case spv::OpGroupNonUniformBallotFindLSB:
+ case spv::OpGroupNonUniformBallotFindMSB:
+ case spv::OpGroupNonUniformShuffle:
+ case spv::OpGroupNonUniformShuffleXor:
+ case spv::OpGroupNonUniformShuffleUp:
+ case spv::OpGroupNonUniformShuffleDown:
+ case spv::OpGroupNonUniformIAdd:
+ case spv::OpGroupNonUniformFAdd:
+ case spv::OpGroupNonUniformIMul:
+ case spv::OpGroupNonUniformFMul:
+ case spv::OpGroupNonUniformSMin:
+ case spv::OpGroupNonUniformUMin:
+ case spv::OpGroupNonUniformFMin:
+ case spv::OpGroupNonUniformSMax:
+ case spv::OpGroupNonUniformUMax:
+ case spv::OpGroupNonUniformFMax:
+ case spv::OpGroupNonUniformBitwiseAnd:
+ case spv::OpGroupNonUniformBitwiseOr:
+ case spv::OpGroupNonUniformBitwiseXor:
+ case spv::OpGroupNonUniformLogicalAnd:
+ case spv::OpGroupNonUniformLogicalOr:
+ case spv::OpGroupNonUniformLogicalXor:
+ return EmitGroupNonUniform(insn, state);
- case spv::OpArrayLength:
- return EmitArrayLength(insn, state);
+ case spv::OpArrayLength:
+ return EmitArrayLength(insn, state);
- default:
- UNREACHABLE("%s", OpcodeName(opcode).c_str());
- break;
+ default:
+ UNREACHABLE("%s", OpcodeName(opcode).c_str());
+ break;
}
return EmitResult::Continue;
@@ -1948,8 +1953,8 @@
for(auto i = 0u; i < insn.wordCount() - 3; i++)
{
Object::ID srcObjectId = insn.word(3u + i);
- auto & srcObject = getObject(srcObjectId);
- auto & srcObjectTy = getType(srcObject.type);
+ auto &srcObject = getObject(srcObjectId);
+ auto &srcObjectTy = getType(srcObject.type);
GenericValue srcObjectAccess(this, state, srcObjectId);
for(auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
@@ -2092,7 +2097,7 @@
for(auto i = 0u; i < type.sizeInComponents; i++)
{
auto sel = cond.Int(condIsScalar ? 0 : i);
- dst.move(i, (sel & lhs.Int(i)) | (~sel & rhs.Int(i))); // TODO: IfThenElse()
+ dst.move(i, (sel & lhs.Int(i)) | (~sel & rhs.Int(i))); // TODO: IfThenElse()
}
return EmitResult::Continue;
@@ -2160,41 +2165,41 @@
UInt v;
switch(insn.opcode())
{
- case spv::OpAtomicIAdd:
- case spv::OpAtomicIIncrement:
- v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
- break;
- case spv::OpAtomicISub:
- case spv::OpAtomicIDecrement:
- v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
- break;
- case spv::OpAtomicAnd:
- v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
- break;
- case spv::OpAtomicOr:
- v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
- break;
- case spv::OpAtomicXor:
- v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
- break;
- case spv::OpAtomicSMin:
- v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
- break;
- case spv::OpAtomicSMax:
- v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
- break;
- case spv::OpAtomicUMin:
- v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
- break;
- case spv::OpAtomicUMax:
- v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
- break;
- case spv::OpAtomicExchange:
- v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
- break;
- default:
- UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
- break;
+ case spv::OpAtomicIAdd:
+ case spv::OpAtomicIIncrement:
+ v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+ break;
+ case spv::OpAtomicISub:
+ case spv::OpAtomicIDecrement:
+ v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+ break;
+ case spv::OpAtomicAnd:
+ v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+ break;
+ case spv::OpAtomicOr:
+ v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+ break;
+ case spv::OpAtomicXor:
+ v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+ break;
+ case spv::OpAtomicSMin:
+ v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
+ break;
+ case spv::OpAtomicSMax:
+ v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
+ break;
+ case spv::OpAtomicUMin:
+ v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+ break;
+ case spv::OpAtomicUMax:
+ v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+ break;
+ case spv::OpAtomicExchange:
+ v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
+ break;
+ default:
+ UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
+ break;
}
x = Insert(x, v, j);
}
@@ -2298,25 +2303,25 @@
{
switch(insn.opcode())
{
- case spv::OpVariable:
- {
- Object::ID resultId = insn.word(2);
- auto &object = getObject(resultId);
- auto &objectTy = getType(object.type);
- if(object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
+ case spv::OpVariable:
{
- auto &dst = routine->getVariable(resultId);
- int offset = 0;
- VisitInterface(resultId,
- [&](Decorations const &d, AttribType type) {
- auto scalarSlot = d.Location << 2 | d.Component;
- routine->outputs[scalarSlot] = dst[offset++];
- });
+ Object::ID resultId = insn.word(2);
+ auto &object = getObject(resultId);
+ auto &objectTy = getType(object.type);
+ if(object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
+ {
+ auto &dst = routine->getVariable(resultId);
+ int offset = 0;
+ VisitInterface(resultId,
+ [&](Decorations const &d, AttribType type) {
+ auto scalarSlot = d.Location << 2 | d.Component;
+ routine->outputs[scalarSlot] = dst[offset++];
+ });
+ }
+ break;
}
- break;
- }
- default:
- break;
+ default:
+ break;
}
}
@@ -2331,47 +2336,46 @@
{
switch(model)
{
- case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT;
- // case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
- // case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
- // case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
- case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
- case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
- // case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan.
- // case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV;
- // case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV;
- // case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV;
- // case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
- // case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
- // case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
- // case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV;
- // case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV;
- default:
- UNSUPPORTED("ExecutionModel: %d", int(model));
- return VkShaderStageFlagBits(0);
+ case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT;
+ // case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+ // case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+ // case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
+ case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
+ case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
+ // case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan.
+ // case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV;
+ // case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV;
+ // case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV;
+ // case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
+ // case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
+ // case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
+ // case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV;
+ // case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV;
+ default:
+ UNSUPPORTED("ExecutionModel: %d", int(model));
+ return VkShaderStageFlagBits(0);
}
}
-SpirvShader::GenericValue::GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId) :
- obj(shader->getObject(objId)),
- intermediate(obj.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(objId) : nullptr),
- type(obj.type) {}
+SpirvShader::GenericValue::GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId)
+ : obj(shader->getObject(objId))
+ , intermediate(obj.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(objId) : nullptr)
+ , type(obj.type)
+{}
-SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
- pipelineLayout(pipelineLayout)
+SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout)
+ : pipelineLayout(pipelineLayout)
{
}
void SpirvRoutine::setImmutableInputBuiltins(SpirvShader const *shader)
{
- setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
});
- setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 4);
value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 2, 4, 8));
value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2379,8 +2383,7 @@
value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
});
- setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 4);
value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(15, 14, 12, 8));
value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2388,8 +2391,7 @@
value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
});
- setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 4);
value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(14, 12, 8, 0));
value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2397,8 +2399,7 @@
value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
});
- setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 4);
value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 3, 7, 15));
value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2406,8 +2407,7 @@
value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
});
- setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 4);
value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(0, 1, 3, 7));
value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
@@ -2415,8 +2415,7 @@
value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
});
- setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 1);
// Only a single physical device is supported.
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index e3d3af7..ecc3f5b 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -15,15 +15,15 @@
#ifndef sw_SpirvShader_hpp
#define sw_SpirvShader_hpp
-#include "ShaderCore.hpp"
#include "SamplerCore.hpp"
+#include "ShaderCore.hpp"
#include "SpirvID.hpp"
-#include "System/Types.hpp"
-#include "Vulkan/VkDebug.hpp"
-#include "Vulkan/VkConfig.h"
-#include "Vulkan/VkDescriptorSet.hpp"
#include "Device/Config.hpp"
#include "Device/Sampler.hpp"
+#include "System/Types.hpp"
+#include "Vulkan/VkConfig.h"
+#include "Vulkan/VkDebug.hpp"
+#include "Vulkan/VkDescriptorSet.hpp"
#include <spirv/unified1/spirv.hpp>
@@ -31,16 +31,16 @@
#include <atomic>
#include <cstdint>
#include <cstring>
+#include <deque>
#include <functional>
#include <memory>
-#include <deque>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
-#undef Yield // b/127920555
+#undef Yield // b/127920555
namespace vk {
@@ -65,8 +65,11 @@
class Intermediate
{
public:
- Intermediate(uint32_t size) : scalar(new rr::Value*[size]), size(size) {
- memset(scalar, 0, sizeof(rr::Value*) * size);
+ Intermediate(uint32_t size)
+ : scalar(new rr::Value *[size])
+ , size(size)
+ {
+ memset(scalar, 0, sizeof(rr::Value *) * size);
}
~Intermediate()
@@ -75,12 +78,12 @@
}
void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value); }
- void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); }
- void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); }
+ void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); }
+ void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); }
void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value); }
- void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); }
- void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); }
+ void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); }
+ void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); }
// Value retrieval functions.
RValue<SIMD::Float> Float(uint32_t i) const
@@ -107,8 +110,8 @@
// No copy/move construction or assignment
Intermediate(Intermediate const &) = delete;
Intermediate(Intermediate &&) = delete;
- Intermediate & operator=(Intermediate const &) = delete;
- Intermediate & operator=(Intermediate &&) = delete;
+ Intermediate &operator=(Intermediate const &) = delete;
+ Intermediate &operator=(Intermediate &&) = delete;
private:
void emplace(uint32_t i, rr::Value *value)
@@ -128,7 +131,7 @@
using InsnStore = std::vector<uint32_t>;
InsnStore insns;
- using ImageSampler = void(void* texture, void *sampler, void* uvsIn, void* texelOut, void* constants);
+ using ImageSampler = void(void *texture, void *sampler, void *uvsIn, void *texelOut, void *constants);
enum class YieldResult
{
@@ -157,15 +160,15 @@
return iter[n];
}
- uint32_t const * wordPointer(uint32_t n) const
+ uint32_t const *wordPointer(uint32_t n) const
{
ASSERT(n < wordCount());
return &iter[n];
}
- const char* string(uint32_t n) const
+ const char *string(uint32_t n) const
{
- return reinterpret_cast<const char*>(wordPointer(n));
+ return reinterpret_cast<const char *>(wordPointer(n));
}
bool operator==(InsnIterator const &other) const
@@ -191,7 +194,7 @@
InsnIterator const operator++(int)
{
- InsnIterator ret{*this};
+ InsnIterator ret{ *this };
iter += wordCount();
return ret;
}
@@ -200,7 +203,8 @@
InsnIterator() = default;
- explicit InsnIterator(InsnStore::const_iterator iter) : iter{iter}
+ explicit InsnIterator(InsnStore::const_iterator iter)
+ : iter{ iter }
{
}
};
@@ -208,12 +212,12 @@
/* range-based-for interface */
InsnIterator begin() const
{
- return InsnIterator{insns.cbegin() + 5};
+ return InsnIterator{ insns.cbegin() + 5 };
}
InsnIterator end() const
{
- return InsnIterator{insns.cend()};
+ return InsnIterator{ insns.cend() };
}
class Type
@@ -287,11 +291,11 @@
ID from;
ID to;
- bool operator == (const Edge& other) const { return from == other.from && to == other.to; }
+ bool operator==(const Edge &other) const { return from == other.from && to == other.to; }
struct Hash
{
- std::size_t operator()(const Edge& edge) const noexcept
+ std::size_t operator()(const Edge &edge) const noexcept
{
return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value());
}
@@ -299,7 +303,7 @@
};
Block() = default;
- Block(const Block& other) = default;
+ Block(const Block &other) = default;
explicit Block(InsnIterator begin, InsnIterator end);
/* range-based-for interface */
@@ -308,22 +312,23 @@
enum Kind
{
- Simple, // OpBranch or other simple terminator.
- StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional
- UnstructuredBranchConditional, // OpBranchConditional
- StructuredSwitch, // OpSelectionMerge + OpSwitch
- UnstructuredSwitch, // OpSwitch
- Loop, // OpLoopMerge + [OpBranchConditional | OpBranch]
+ Simple, // OpBranch or other simple terminator.
+ StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional
+ UnstructuredBranchConditional, // OpBranchConditional
+ StructuredSwitch, // OpSelectionMerge + OpSwitch
+ UnstructuredSwitch, // OpSwitch
+ Loop, // OpLoopMerge + [OpBranchConditional | OpBranch]
};
Kind kind = Simple;
- InsnIterator mergeInstruction; // Structured control flow merge instruction.
- InsnIterator branchInstruction; // Branch instruction.
- ID mergeBlock; // Structured flow merge block.
- ID continueTarget; // Loop continue block.
- Set ins; // Blocks that branch into this block.
- Set outs; // Blocks that this block branches to.
+ InsnIterator mergeInstruction; // Structured control flow merge instruction.
+ InsnIterator branchInstruction; // Branch instruction.
+ ID mergeBlock; // Structured flow merge block.
+ ID continueTarget; // Loop continue block.
+ Set ins; // Blocks that branch into this block.
+ Set outs; // Blocks that this block branches to.
bool isLoopMerge = false;
+
private:
InsnIterator begin_;
InsnIterator end_;
@@ -336,7 +341,7 @@
// Walks all reachable the blocks starting from id adding them to
// reachable.
- void TraverseReachableBlocks(Block::ID id, Block::Set& reachable) const;
+ void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const;
// AssignBlockFields() performs the following for all reachable blocks:
// * Assigns Block::ins with the identifiers of all blocks that contain
@@ -362,13 +367,14 @@
return it->second;
}
- Block::ID entry; // function entry point block.
- HandleMap<Block> blocks; // blocks belonging to this function.
- Type::ID type; // type of the function.
- Type::ID result; // return type.
+ Block::ID entry; // function entry point block.
+ HandleMap<Block> blocks; // blocks belonging to this function.
+ Type::ID type; // type of the function.
+ Type::ID result; // return type.
};
- struct TypeOrObject {}; // Dummy struct to represent a Type or Object.
+ struct TypeOrObject
+ {}; // Dummy struct to represent a Type or Object.
// TypeOrObjectID is an identifier that represents a Type or an Object,
// and supports implicit casting to and from Type::ID or Object::ID.
@@ -377,9 +383,15 @@
public:
using Hash = std::hash<SpirvID<TypeOrObject>>;
- inline TypeOrObjectID(uint32_t id) : SpirvID(id) {}
- inline TypeOrObjectID(Type::ID id) : SpirvID(id.value()) {}
- inline TypeOrObjectID(Object::ID id) : SpirvID(id.value()) {}
+ inline TypeOrObjectID(uint32_t id)
+ : SpirvID(id)
+ {}
+ inline TypeOrObjectID(Type::ID id)
+ : SpirvID(id.value())
+ {}
+ inline TypeOrObjectID(Object::ID id)
+ : SpirvID(id.value())
+ {}
inline operator Type::ID() const { return Type::ID(value()); }
inline operator Object::ID() const { return Object::ID(value()); }
};
@@ -399,14 +411,16 @@
struct ImageInstruction
{
ImageInstruction(Variant variant, SamplerMethod samplerMethod)
- : parameters(0)
+ : parameters(0)
{
this->variant = variant;
this->samplerMethod = samplerMethod;
}
// Unmarshal from raw 32-bit data
- ImageInstruction(uint32_t parameters) : parameters(parameters) {}
+ ImageInstruction(uint32_t parameters)
+ : parameters(parameters)
+ {}
SamplerFunction getSamplerFunction() const
{
@@ -432,12 +446,12 @@
uint32_t gatherComponent : 2;
// Parameters are passed to the sampling routine in this order:
- uint32_t coordinates : 3; // 1-4 (does not contain projection component)
- // uint32_t dref : 1; // Indicated by Variant::ProjDref|Dref
- // uint32_t lodOrBias : 1; // Indicated by SamplerMethod::Lod|Bias|Fetch
- uint32_t grad : 2; // 0-3 components (for each of dx / dy)
- uint32_t offset : 2; // 0-3 components
- uint32_t sample : 1; // 0-1 scalar integer
+ uint32_t coordinates : 3; // 1-4 (does not contain projection component)
+ // uint32_t dref : 1; // Indicated by Variant::ProjDref|Dref
+ // uint32_t lodOrBias : 1; // Indicated by SamplerMethod::Lod|Bias|Fetch
+ uint32_t grad : 2; // 0-3 components (for each of dx / dy)
+ uint32_t offset : 2; // 0-3 components
+ uint32_t sample : 1; // 0-1 scalar integer
};
uint32_t parameters;
@@ -450,7 +464,7 @@
// shader entry point represented by this object.
uint64_t getSerialID() const
{
- return ((uint64_t)entryPoint.value() << 32) | codeSerialID;
+ return ((uint64_t)entryPoint.value() << 32) | codeSerialID;
}
SpirvShader(uint32_t codeSerialID,
@@ -577,7 +591,7 @@
bool HasOffset : 1;
bool HasArrayStride : 1;
bool HasMatrixStride : 1;
- bool HasRowMajor : 1; // whether RowMajor bit is valid.
+ bool HasRowMajor : 1; // whether RowMajor bit is valid.
bool Flat : 1;
bool Centroid : 1;
@@ -585,21 +599,31 @@
bool Block : 1;
bool BufferBlock : 1;
bool RelaxedPrecision : 1;
- bool RowMajor : 1; // RowMajor if true; ColMajor if false
- bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix.
+ bool RowMajor : 1; // RowMajor if true; ColMajor if false
+ bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix.
Decorations()
- : Location{-1}, Component{0},
- BuiltIn{static_cast<spv::BuiltIn>(-1)},
- Offset{-1}, ArrayStride{-1}, MatrixStride{-1},
- HasLocation{false}, HasComponent{false},
- HasBuiltIn{false}, HasOffset{false},
- HasArrayStride{false}, HasMatrixStride{false},
- HasRowMajor{false},
- Flat{false}, Centroid{false}, NoPerspective{false},
- Block{false}, BufferBlock{false},
- RelaxedPrecision{false}, RowMajor{false},
- InsideMatrix{false}
+ : Location{ -1 }
+ , Component{ 0 }
+ , BuiltIn{ static_cast<spv::BuiltIn>(-1) }
+ , Offset{ -1 }
+ , ArrayStride{ -1 }
+ , MatrixStride{ -1 }
+ , HasLocation{ false }
+ , HasComponent{ false }
+ , HasBuiltIn{ false }
+ , HasOffset{ false }
+ , HasArrayStride{ false }
+ , HasMatrixStride{ false }
+ , HasRowMajor{ false }
+ , Flat{ false }
+ , Centroid{ false }
+ , NoPerspective{ false }
+ , Block{ false }
+ , BufferBlock{ false }
+ , RelaxedPrecision{ false }
+ , RowMajor{ false }
+ , InsideMatrix{ false }
{
}
@@ -642,7 +666,8 @@
};
InterfaceComponent()
- : Type{ATTRIBTYPE_UNUSED}, DecorationBits{0}
+ : Type{ ATTRIBTYPE_UNUSED }
+ , DecorationBits{ 0 }
{
}
};
@@ -673,9 +698,10 @@
}
// returns the total allocated size in bytes.
inline uint32_t size() const { return totalSize; }
+
private:
- uint32_t totalSize = 0; // in bytes
- std::unordered_map<Object::ID, uint32_t> offsets; // in bytes
+ uint32_t totalSize = 0; // in bytes
+ std::unordered_map<Object::ID, uint32_t> offsets; // in bytes
};
std::vector<InterfaceComponent> inputs;
@@ -700,7 +726,7 @@
Function::ID entryPoint;
const bool robustBufferAccess = true;
- spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing.
+ spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing.
// DeclareType creates a Type for the given OpTypeX instruction, storing
// it into the types map. It is called from the analysis pass (constructor).
@@ -768,30 +794,30 @@
using InterfaceVisitor = std::function<void(Decorations const, AttribType)>;
- void VisitInterface(Object::ID id, const InterfaceVisitor& v) const;
+ void VisitInterface(Object::ID id, const InterfaceVisitor &v) const;
- int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor& v) const;
+ int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const;
// MemoryElement describes a scalar element within a structure, and is
// used by the callback function of VisitMemoryObject().
struct MemoryElement
{
- uint32_t index; // index of the scalar element
- uint32_t offset; // offset (in bytes) from the base of the object
- const Type& type; // element type
+ uint32_t index; // index of the scalar element
+ uint32_t offset; // offset (in bytes) from the base of the object
+ const Type &type; // element type
};
- using MemoryVisitor = std::function<void(const MemoryElement&)>;
+ using MemoryVisitor = std::function<void(const MemoryElement &)>;
// VisitMemoryObject() walks a type tree in an explicitly laid out
// storage class, calling the MemoryVisitor for each scalar element
// within the
- void VisitMemoryObject(Object::ID id, const MemoryVisitor& v) const;
+ void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const;
// VisitMemoryObjectInner() is internally called by VisitMemoryObject()
- void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor& v) const;
+ void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const;
- Object& CreateConstant(InsnIterator it);
+ Object &CreateConstant(InsnIterator it);
void ProcessInterfaceVariable(Object &object);
@@ -800,19 +826,19 @@
{
public:
EmitState(SpirvRoutine *routine,
- Function::ID function,
- RValue<SIMD::Int> activeLaneMask,
- RValue<SIMD::Int> storesAndAtomicsMask,
- const vk::DescriptorSet::Bindings &descriptorSets,
- bool robustBufferAccess,
- spv::ExecutionModel executionModel)
- : routine(routine),
- function(function),
- activeLaneMaskValue(activeLaneMask.value),
- storesAndAtomicsMaskValue(storesAndAtomicsMask.value),
- descriptorSets(descriptorSets),
- robustBufferAccess(robustBufferAccess),
- executionModel(executionModel)
+ Function::ID function,
+ RValue<SIMD::Int> activeLaneMask,
+ RValue<SIMD::Int> storesAndAtomicsMask,
+ const vk::DescriptorSet::Bindings &descriptorSets,
+ bool robustBufferAccess,
+ spv::ExecutionModel executionModel)
+ : routine(routine)
+ , function(function)
+ , activeLaneMaskValue(activeLaneMask.value)
+ , storesAndAtomicsMaskValue(storesAndAtomicsMask.value)
+ , descriptorSets(descriptorSets)
+ , robustBufferAccess(robustBufferAccess)
+ , executionModel(executionModel)
{
ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting.
}
@@ -845,12 +871,12 @@
// they will be ORed together.
void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask);
- SpirvRoutine *routine = nullptr; // The current routine being built.
- Function::ID function; // The current function being built.
- Block::ID block; // The current block being built.
- rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask.
- rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask.
- Block::Set visited; // Blocks already built.
+ SpirvRoutine *routine = nullptr; // The current routine being built.
+ Function::ID function; // The current function being built.
+ Block::ID block; // The current block being built.
+ rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask.
+ rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask.
+ Block::Set visited; // Blocks already built.
std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
std::deque<Block::ID> *pending;
@@ -858,16 +884,16 @@
OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
- Intermediate& createIntermediate(Object::ID id, uint32_t size)
+ Intermediate &createIntermediate(Object::ID id, uint32_t size)
{
auto it = intermediates.emplace(std::piecewise_construct,
- std::forward_as_tuple(id),
- std::forward_as_tuple(size));
+ std::forward_as_tuple(id),
+ std::forward_as_tuple(size));
ASSERT_MSG(it.second, "Intermediate %d created twice", id.value());
return it.first->second;
}
- Intermediate const& getIntermediate(Object::ID id) const
+ Intermediate const &getIntermediate(Object::ID id) const
{
auto it = intermediates.find(id);
ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value());
@@ -880,7 +906,7 @@
ASSERT_MSG(added, "Pointer %d created twice", id.value());
}
- SIMD::Pointer const& getPointer(Object::ID id) const
+ SIMD::Pointer const &getPointer(Object::ID id) const
{
auto it = pointers.find(id);
ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
@@ -898,8 +924,8 @@
// EmitResult is an enumerator of result values from the Emit functions.
enum class EmitResult
{
- Continue, // No termination instructions.
- Terminator, // Reached a termination instruction.
+ Continue, // No termination instructions.
+ Terminator, // Reached a termination instruction.
};
// Generic wrapper over either per-lane intermediate value, or a constant.
@@ -1058,7 +1084,7 @@
EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const;
void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
- SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
+ SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
uint32_t GetConstScalarInt(Object::ID id) const;
void EvalSpecConstantOp(InsnIterator insn);
void EvalSpecConstantUnaryOp(InsnIterator insn);
@@ -1071,7 +1097,7 @@
// StorePhi updates the phi's alloca storage value using the incoming
// values from blocks that are both in the OpPhi instruction and in
// filter.
- void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const;
+ void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const;
// Emits a rr::Fence for the given MemorySemanticsMask.
void Fence(spv::MemorySemanticsMask semantics) const;
@@ -1085,7 +1111,7 @@
static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics);
// Helper as we often need to take dot products as part of doing other things.
- SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
+ SIMD::Float Dot(unsigned numComponents, GenericValue const &x, GenericValue const &y) const;
// Splits x into a floating-point significand in the range [0.5, 1.0)
// and an integral exponent of two, such that:
@@ -1121,21 +1147,21 @@
Pointer<Byte> function;
};
- vk::PipelineLayout const * const pipelineLayout;
+ vk::PipelineLayout const *const pipelineLayout;
std::unordered_map<SpirvShader::Object::ID, Variable> variables;
std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache;
- Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
- Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
+ Variable inputs = Variable{ MAX_INTERFACE_COMPONENTS };
+ Variable outputs = Variable{ MAX_INTERFACE_COMPONENTS };
Pointer<Byte> workgroupMemory;
Pointer<Pointer<Byte>> descriptorSets;
Pointer<Int> descriptorDynamicOffsets;
Pointer<Byte> pushConstants;
Pointer<Byte> constants;
- Int killMask = Int{0};
+ Int killMask = Int{ 0 };
SIMD::Int windowSpacePosition[2];
- Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
+ Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
void createVariable(SpirvShader::Object::ID id, uint32_t size)
{
@@ -1143,7 +1169,7 @@
ASSERT_MSG(added, "Variable %d created twice", id.value());
}
- Variable& getVariable(SpirvShader::Object::ID id)
+ Variable &getVariable(SpirvShader::Object::ID id)
{
auto it = variables.find(id);
ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value());
@@ -1158,13 +1184,13 @@
// uses the input builtin, otherwise the call is a no-op.
// F is a function with the signature:
// void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- template <typename F>
- inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F&& f)
+ template<typename F>
+ inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f)
{
auto it = shader->inputBuiltins.find(id);
if(it != shader->inputBuiltins.end())
{
- const auto& builtin = it->second;
+ const auto &builtin = it->second;
f(builtin, getVariable(builtin.Id));
}
}
@@ -1176,7 +1202,6 @@
friend class SpirvShader;
std::unordered_map<SpirvShader::Object::ID, Variable> phis;
-
};
} // namespace sw
diff --git a/src/Pipeline/SpirvShaderArithmetic.cpp b/src/Pipeline/SpirvShaderArithmetic.cpp
index 218df0e..f039124 100644
--- a/src/Pipeline/SpirvShaderArithmetic.cpp
+++ b/src/Pipeline/SpirvShaderArithmetic.cpp
@@ -163,153 +163,152 @@
{
switch(insn.opcode())
{
- case spv::OpNot:
- case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
- dst.move(i, ~src.UInt(i));
- break;
- case spv::OpBitFieldInsert:
- {
- auto insert = GenericValue(this, state, insn.word(4)).UInt(i);
- auto offset = GenericValue(this, state, insn.word(5)).UInt(0);
- auto count = GenericValue(this, state, insn.word(6)).UInt(0);
- auto one = SIMD::UInt(1);
- auto v = src.UInt(i);
- auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
- dst.move(i, (v & ~mask) | ((insert << offset) & mask));
- break;
- }
- case spv::OpBitFieldSExtract:
- case spv::OpBitFieldUExtract:
- {
- auto offset = GenericValue(this, state, insn.word(4)).UInt(0);
- auto count = GenericValue(this, state, insn.word(5)).UInt(0);
- auto one = SIMD::UInt(1);
- auto v = src.UInt(i);
- SIMD::UInt out = (v >> offset) & Bitmask32(count);
- if(insn.opcode() == spv::OpBitFieldSExtract)
+ case spv::OpNot:
+ case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
+ dst.move(i, ~src.UInt(i));
+ break;
+ case spv::OpBitFieldInsert:
{
- auto sign = out & NthBit32(count - one);
- auto sext = ~(sign - one);
- out |= sext;
+ auto insert = GenericValue(this, state, insn.word(4)).UInt(i);
+ auto offset = GenericValue(this, state, insn.word(5)).UInt(0);
+ auto count = GenericValue(this, state, insn.word(6)).UInt(0);
+ auto one = SIMD::UInt(1);
+ auto v = src.UInt(i);
+ auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
+ dst.move(i, (v & ~mask) | ((insert << offset) & mask));
+ break;
}
- dst.move(i, out);
- break;
- }
- case spv::OpBitReverse:
- {
- // TODO: Add an intrinsic to reactor. Even if there isn't a
- // single vector instruction, there may be target-dependent
- // ways to make this faster.
- // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
- SIMD::UInt v = src.UInt(i);
- v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
- v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
- v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
- v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
- v = (v >> 16) | (v << 16);
- dst.move(i, v);
- break;
- }
- case spv::OpBitCount:
- dst.move(i, CountBits(src.UInt(i)));
- break;
- case spv::OpSNegate:
- dst.move(i, -src.Int(i));
- break;
- case spv::OpFNegate:
- dst.move(i, -src.Float(i));
- break;
- case spv::OpConvertFToU:
- dst.move(i, SIMD::UInt(src.Float(i)));
- break;
- case spv::OpConvertFToS:
- dst.move(i, SIMD::Int(src.Float(i)));
- break;
- case spv::OpConvertSToF:
- dst.move(i, SIMD::Float(src.Int(i)));
- break;
- case spv::OpConvertUToF:
- dst.move(i, SIMD::Float(src.UInt(i)));
- break;
- case spv::OpBitcast:
- dst.move(i, src.Float(i));
- break;
- case spv::OpIsInf:
- dst.move(i, IsInf(src.Float(i)));
- break;
- case spv::OpIsNan:
- dst.move(i, IsNan(src.Float(i)));
- break;
- case spv::OpDPdx:
- case spv::OpDPdxCoarse:
- // Derivative instructions: FS invocations are laid out like so:
- // 0 1
- // 2 3
- static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
- dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
- break;
- case spv::OpDPdy:
- case spv::OpDPdyCoarse:
- dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
- break;
- case spv::OpFwidth:
- case spv::OpFwidthCoarse:
- dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
- + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
- break;
- case spv::OpDPdxFine:
- {
- auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
- auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
- SIMD::Float v = SIMD::Float(firstRow);
- v = Insert(v, secondRow, 2);
- v = Insert(v, secondRow, 3);
- dst.move(i, v);
- break;
- }
- case spv::OpDPdyFine:
- {
- auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
- auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
- SIMD::Float v = SIMD::Float(firstColumn);
- v = Insert(v, secondColumn, 1);
- v = Insert(v, secondColumn, 3);
- dst.move(i, v);
- break;
- }
- case spv::OpFwidthFine:
- {
- auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
- auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
- SIMD::Float dpdx = SIMD::Float(firstRow);
- dpdx = Insert(dpdx, secondRow, 2);
- dpdx = Insert(dpdx, secondRow, 3);
- auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
- auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
- SIMD::Float dpdy = SIMD::Float(firstColumn);
- dpdy = Insert(dpdy, secondColumn, 1);
- dpdy = Insert(dpdy, secondColumn, 3);
- dst.move(i, Abs(dpdx) + Abs(dpdy));
- break;
- }
- case spv::OpQuantizeToF16:
- {
- // Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp
- auto abs = Abs(src.Float(i));
- auto sign = src.Int(i) & SIMD::Int(0x80000000);
- auto isZero = CmpLT(abs, SIMD::Float(0.000061035f));
- auto isInf = CmpGT(abs, SIMD::Float(65504.0f));
- auto isNaN = IsNan(abs);
- auto isInfOrNan = isInf | isNaN;
- SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000);
- v &= ~isZero | SIMD::Int(0x80000000);
- v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v);
- v |= isNaN & SIMD::Int(0x400000);
- dst.move(i, v);
- break;
- }
- default:
- UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
+ case spv::OpBitFieldSExtract:
+ case spv::OpBitFieldUExtract:
+ {
+ auto offset = GenericValue(this, state, insn.word(4)).UInt(0);
+ auto count = GenericValue(this, state, insn.word(5)).UInt(0);
+ auto one = SIMD::UInt(1);
+ auto v = src.UInt(i);
+ SIMD::UInt out = (v >> offset) & Bitmask32(count);
+ if(insn.opcode() == spv::OpBitFieldSExtract)
+ {
+ auto sign = out & NthBit32(count - one);
+ auto sext = ~(sign - one);
+ out |= sext;
+ }
+ dst.move(i, out);
+ break;
+ }
+ case spv::OpBitReverse:
+ {
+ // TODO: Add an intrinsic to reactor. Even if there isn't a
+ // single vector instruction, there may be target-dependent
+ // ways to make this faster.
+ // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+ SIMD::UInt v = src.UInt(i);
+ v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
+ v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
+ v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
+ v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
+ v = (v >> 16) | (v << 16);
+ dst.move(i, v);
+ break;
+ }
+ case spv::OpBitCount:
+ dst.move(i, CountBits(src.UInt(i)));
+ break;
+ case spv::OpSNegate:
+ dst.move(i, -src.Int(i));
+ break;
+ case spv::OpFNegate:
+ dst.move(i, -src.Float(i));
+ break;
+ case spv::OpConvertFToU:
+ dst.move(i, SIMD::UInt(src.Float(i)));
+ break;
+ case spv::OpConvertFToS:
+ dst.move(i, SIMD::Int(src.Float(i)));
+ break;
+ case spv::OpConvertSToF:
+ dst.move(i, SIMD::Float(src.Int(i)));
+ break;
+ case spv::OpConvertUToF:
+ dst.move(i, SIMD::Float(src.UInt(i)));
+ break;
+ case spv::OpBitcast:
+ dst.move(i, src.Float(i));
+ break;
+ case spv::OpIsInf:
+ dst.move(i, IsInf(src.Float(i)));
+ break;
+ case spv::OpIsNan:
+ dst.move(i, IsNan(src.Float(i)));
+ break;
+ case spv::OpDPdx:
+ case spv::OpDPdxCoarse:
+ // Derivative instructions: FS invocations are laid out like so:
+ // 0 1
+ // 2 3
+ static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
+ dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
+ break;
+ case spv::OpDPdy:
+ case spv::OpDPdyCoarse:
+ dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
+ break;
+ case spv::OpFwidth:
+ case spv::OpFwidthCoarse:
+ dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)) + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
+ break;
+ case spv::OpDPdxFine:
+ {
+ auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
+ auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
+ SIMD::Float v = SIMD::Float(firstRow);
+ v = Insert(v, secondRow, 2);
+ v = Insert(v, secondRow, 3);
+ dst.move(i, v);
+ break;
+ }
+ case spv::OpDPdyFine:
+ {
+ auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
+ auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
+ SIMD::Float v = SIMD::Float(firstColumn);
+ v = Insert(v, secondColumn, 1);
+ v = Insert(v, secondColumn, 3);
+ dst.move(i, v);
+ break;
+ }
+ case spv::OpFwidthFine:
+ {
+ auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
+ auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
+ SIMD::Float dpdx = SIMD::Float(firstRow);
+ dpdx = Insert(dpdx, secondRow, 2);
+ dpdx = Insert(dpdx, secondRow, 3);
+ auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
+ auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
+ SIMD::Float dpdy = SIMD::Float(firstColumn);
+ dpdy = Insert(dpdy, secondColumn, 1);
+ dpdy = Insert(dpdy, secondColumn, 3);
+ dst.move(i, Abs(dpdx) + Abs(dpdy));
+ break;
+ }
+ case spv::OpQuantizeToF16:
+ {
+ // Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp
+ auto abs = Abs(src.Float(i));
+ auto sign = src.Int(i) & SIMD::Int(0x80000000);
+ auto isZero = CmpLT(abs, SIMD::Float(0.000061035f));
+ auto isInf = CmpGT(abs, SIMD::Float(65504.0f));
+ auto isNaN = IsNan(abs);
+ auto isInfOrNan = isInf | isNaN;
+ SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000);
+ v &= ~isZero | SIMD::Int(0x80000000);
+ v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v);
+ v |= isNaN & SIMD::Int(0x400000);
+ dst.move(i, v);
+ break;
+ }
+ default:
+ UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
}
}
@@ -328,191 +327,191 @@
{
switch(insn.opcode())
{
- case spv::OpIAdd:
- dst.move(i, lhs.Int(i) + rhs.Int(i));
- break;
- case spv::OpISub:
- dst.move(i, lhs.Int(i) - rhs.Int(i));
- break;
- case spv::OpIMul:
- dst.move(i, lhs.Int(i) * rhs.Int(i));
- break;
- case spv::OpSDiv:
- {
- SIMD::Int a = lhs.Int(i);
- SIMD::Int b = rhs.Int(i);
- b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
- a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
- dst.move(i, a / b);
- break;
- }
- case spv::OpUDiv:
- {
- auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
- dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
- break;
- }
- case spv::OpSRem:
- {
- SIMD::Int a = lhs.Int(i);
- SIMD::Int b = rhs.Int(i);
- b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
- a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
- dst.move(i, a % b);
- break;
- }
- case spv::OpSMod:
- {
- SIMD::Int a = lhs.Int(i);
- SIMD::Int b = rhs.Int(i);
- b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
- a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
- auto mod = a % b;
- // If a and b have opposite signs, the remainder operation takes
- // the sign from a but OpSMod is supposed to take the sign of b.
- // Adding b will ensure that the result has the correct sign and
- // that it is still congruent to a modulo b.
- //
- // See also http://mathforum.org/library/drmath/view/52343.html
- auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
- auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
- dst.move(i, As<SIMD::Float>(fixedMod));
- break;
- }
- case spv::OpUMod:
- {
- auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
- dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
- break;
- }
- case spv::OpIEqual:
- case spv::OpLogicalEqual:
- dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
- break;
- case spv::OpINotEqual:
- case spv::OpLogicalNotEqual:
- dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
- break;
- case spv::OpUGreaterThan:
- dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
- break;
- case spv::OpSGreaterThan:
- dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
- break;
- case spv::OpUGreaterThanEqual:
- dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
- break;
- case spv::OpSGreaterThanEqual:
- dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
- break;
- case spv::OpULessThan:
- dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
- break;
- case spv::OpSLessThan:
- dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
- break;
- case spv::OpULessThanEqual:
- dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
- break;
- case spv::OpSLessThanEqual:
- dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
- break;
- case spv::OpFAdd:
- dst.move(i, lhs.Float(i) + rhs.Float(i));
- break;
- case spv::OpFSub:
- dst.move(i, lhs.Float(i) - rhs.Float(i));
- break;
- case spv::OpFMul:
- dst.move(i, lhs.Float(i) * rhs.Float(i));
- break;
- case spv::OpFDiv:
- dst.move(i, lhs.Float(i) / rhs.Float(i));
- break;
- case spv::OpFMod:
- // TODO(b/126873455): inaccurate for values greater than 2^24
- dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
- break;
- case spv::OpFRem:
- dst.move(i, lhs.Float(i) % rhs.Float(i));
- break;
- case spv::OpFOrdEqual:
- dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFUnordEqual:
- dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFOrdNotEqual:
- dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFUnordNotEqual:
- dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFOrdLessThan:
- dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFUnordLessThan:
- dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFOrdGreaterThan:
- dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFUnordGreaterThan:
- dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFOrdLessThanEqual:
- dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFUnordLessThanEqual:
- dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFOrdGreaterThanEqual:
- dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpFUnordGreaterThanEqual:
- dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
- break;
- case spv::OpShiftRightLogical:
- dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
- break;
- case spv::OpShiftRightArithmetic:
- dst.move(i, lhs.Int(i) >> rhs.Int(i));
- break;
- case spv::OpShiftLeftLogical:
- dst.move(i, lhs.UInt(i) << rhs.UInt(i));
- break;
- case spv::OpBitwiseOr:
- case spv::OpLogicalOr:
- dst.move(i, lhs.UInt(i) | rhs.UInt(i));
- break;
- case spv::OpBitwiseXor:
- dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
- break;
- case spv::OpBitwiseAnd:
- case spv::OpLogicalAnd:
- dst.move(i, lhs.UInt(i) & rhs.UInt(i));
- break;
- case spv::OpSMulExtended:
- // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
- // In our flat view then, component i is the i'th component of the first member;
- // component i + N is the i'th component of the second member.
- dst.move(i, lhs.Int(i) * rhs.Int(i));
- dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
- break;
- case spv::OpUMulExtended:
- dst.move(i, lhs.UInt(i) * rhs.UInt(i));
- dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
- break;
- case spv::OpIAddCarry:
- dst.move(i, lhs.UInt(i) + rhs.UInt(i));
- dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31);
- break;
- case spv::OpISubBorrow:
- dst.move(i, lhs.UInt(i) - rhs.UInt(i));
- dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31);
- break;
- default:
- UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
+ case spv::OpIAdd:
+ dst.move(i, lhs.Int(i) + rhs.Int(i));
+ break;
+ case spv::OpISub:
+ dst.move(i, lhs.Int(i) - rhs.Int(i));
+ break;
+ case spv::OpIMul:
+ dst.move(i, lhs.Int(i) * rhs.Int(i));
+ break;
+ case spv::OpSDiv:
+ {
+ SIMD::Int a = lhs.Int(i);
+ SIMD::Int b = rhs.Int(i);
+ b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
+ a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
+ dst.move(i, a / b);
+ break;
+ }
+ case spv::OpUDiv:
+ {
+ auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
+ dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
+ break;
+ }
+ case spv::OpSRem:
+ {
+ SIMD::Int a = lhs.Int(i);
+ SIMD::Int b = rhs.Int(i);
+ b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
+ a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
+ dst.move(i, a % b);
+ break;
+ }
+ case spv::OpSMod:
+ {
+ SIMD::Int a = lhs.Int(i);
+ SIMD::Int b = rhs.Int(i);
+ b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
+ a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
+ auto mod = a % b;
+ // If a and b have opposite signs, the remainder operation takes
+ // the sign from a but OpSMod is supposed to take the sign of b.
+ // Adding b will ensure that the result has the correct sign and
+ // that it is still congruent to a modulo b.
+ //
+ // See also http://mathforum.org/library/drmath/view/52343.html
+ auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
+ auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
+ dst.move(i, As<SIMD::Float>(fixedMod));
+ break;
+ }
+ case spv::OpUMod:
+ {
+ auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
+ dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
+ break;
+ }
+ case spv::OpIEqual:
+ case spv::OpLogicalEqual:
+ dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
+ break;
+ case spv::OpINotEqual:
+ case spv::OpLogicalNotEqual:
+ dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
+ break;
+ case spv::OpUGreaterThan:
+ dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
+ break;
+ case spv::OpSGreaterThan:
+ dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
+ break;
+ case spv::OpUGreaterThanEqual:
+ dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
+ break;
+ case spv::OpSGreaterThanEqual:
+ dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
+ break;
+ case spv::OpULessThan:
+ dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
+ break;
+ case spv::OpSLessThan:
+ dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
+ break;
+ case spv::OpULessThanEqual:
+ dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
+ break;
+ case spv::OpSLessThanEqual:
+ dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
+ break;
+ case spv::OpFAdd:
+ dst.move(i, lhs.Float(i) + rhs.Float(i));
+ break;
+ case spv::OpFSub:
+ dst.move(i, lhs.Float(i) - rhs.Float(i));
+ break;
+ case spv::OpFMul:
+ dst.move(i, lhs.Float(i) * rhs.Float(i));
+ break;
+ case spv::OpFDiv:
+ dst.move(i, lhs.Float(i) / rhs.Float(i));
+ break;
+ case spv::OpFMod:
+ // TODO(b/126873455): inaccurate for values greater than 2^24
+ dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
+ break;
+ case spv::OpFRem:
+ dst.move(i, lhs.Float(i) % rhs.Float(i));
+ break;
+ case spv::OpFOrdEqual:
+ dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFUnordEqual:
+ dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFOrdNotEqual:
+ dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFUnordNotEqual:
+ dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFOrdLessThan:
+ dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFUnordLessThan:
+ dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFOrdGreaterThan:
+ dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFUnordGreaterThan:
+ dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFOrdLessThanEqual:
+ dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFUnordLessThanEqual:
+ dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFOrdGreaterThanEqual:
+ dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpFUnordGreaterThanEqual:
+ dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
+ break;
+ case spv::OpShiftRightLogical:
+ dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
+ break;
+ case spv::OpShiftRightArithmetic:
+ dst.move(i, lhs.Int(i) >> rhs.Int(i));
+ break;
+ case spv::OpShiftLeftLogical:
+ dst.move(i, lhs.UInt(i) << rhs.UInt(i));
+ break;
+ case spv::OpBitwiseOr:
+ case spv::OpLogicalOr:
+ dst.move(i, lhs.UInt(i) | rhs.UInt(i));
+ break;
+ case spv::OpBitwiseXor:
+ dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
+ break;
+ case spv::OpBitwiseAnd:
+ case spv::OpLogicalAnd:
+ dst.move(i, lhs.UInt(i) & rhs.UInt(i));
+ break;
+ case spv::OpSMulExtended:
+ // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
+ // In our flat view then, component i is the i'th component of the first member;
+ // component i + N is the i'th component of the second member.
+ dst.move(i, lhs.Int(i) * rhs.Int(i));
+ dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
+ break;
+ case spv::OpUMulExtended:
+ dst.move(i, lhs.UInt(i) * rhs.UInt(i));
+ dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
+ break;
+ case spv::OpIAddCarry:
+ dst.move(i, lhs.UInt(i) + rhs.UInt(i));
+ dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31);
+ break;
+ case spv::OpISubBorrow:
+ dst.move(i, lhs.UInt(i) - rhs.UInt(i));
+ dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31);
+ break;
+ default:
+ UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
}
}
@@ -532,7 +531,7 @@
return EmitResult::Continue;
}
-SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
+SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const &x, GenericValue const &y) const
{
SIMD::Float d = x.Float(0) * y.Float(0);
diff --git a/src/Pipeline/SpirvShaderControlFlow.cpp b/src/Pipeline/SpirvShaderControlFlow.cpp
index e7b1fd7..5f7a1b1 100644
--- a/src/Pipeline/SpirvShaderControlFlow.cpp
+++ b/src/Pipeline/SpirvShaderControlFlow.cpp
@@ -14,7 +14,7 @@
#include "SpirvShader.hpp"
-#include "Reactor/Coroutine.hpp" // rr::Yield
+#include "Reactor/Coroutine.hpp" // rr::Yield
#include "ShaderCore.hpp"
@@ -24,7 +24,9 @@
namespace sw {
-SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
+SpirvShader::Block::Block(InsnIterator begin, InsnIterator end)
+ : begin_(begin)
+ , end_(end)
{
// Default to a Simple, this may change later.
kind = Block::Simple;
@@ -111,7 +113,7 @@
}
}
-void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable) const
+void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set &reachable) const
{
if(reachable.count(id) == 0)
{
@@ -156,8 +158,8 @@
auto block = getBlock(blockId);
for(auto dep : block.ins)
{
- if(block.kind != Block::Loop || // if not a loop...
- !ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
+ if(block.kind != Block::Loop || // if not a loop...
+ !ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
{
f(dep);
}
@@ -196,7 +198,7 @@
void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
{
- auto edge = Block::Edge{from, to};
+ auto edge = Block::Edge{ from, to };
auto it = edgeActiveLaneMasks.find(edge);
if(it == edgeActiveLaneMasks.end())
{
@@ -212,7 +214,7 @@
RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
{
- auto edge = Block::Edge{from, to};
+ auto edge = Block::Edge{ from, to };
auto it = state->edgeActiveLaneMasks.find(edge);
ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
return it->second;
@@ -239,8 +241,7 @@
// Ensure all dependency blocks have been generated.
auto depsDone = true;
- function.ForeachBlockDependency(id, [&](Block::ID dep)
- {
+ function.ForeachBlockDependency(id, [&](Block::ID dep) {
if(state->visited.count(dep) == 0)
{
state->pending->push_front(dep);
@@ -287,7 +288,7 @@
if(!state->visited.emplace(blockId).second)
{
- return; // Already generated this block.
+ return; // Already generated this block.
}
if(blockId != function.entry)
@@ -323,7 +324,7 @@
if(!state->visited.emplace(blockId).second)
{
- return; // Already emitted this loop.
+ return; // Already emitted this loop.
}
// Gather all the blocks that make up the loop.
@@ -414,7 +415,7 @@
// Add active lanes to the merge lane mask.
for(auto in : function.getBlock(mergeBlockId).ins)
{
- auto edge = Block::Edge{in, mergeBlockId};
+ auto edge = Block::Edge{ in, mergeBlockId };
auto it = state->edgeActiveLaneMasks.find(edge);
if(it != state->edgeActiveLaneMasks.end())
{
@@ -563,9 +564,9 @@
SpirvShader::EmitResult SpirvShader::EmitFunctionCall(InsnIterator insn, EmitState *state) const
{
auto functionId = Function::ID(insn.word(3));
- const auto& functionIt = functions.find(functionId);
+ const auto &functionIt = functions.find(functionId);
ASSERT(functionIt != functions.end());
- auto& function = functionIt->second;
+ auto &function = functionIt->second;
// TODO(b/141246700): Add full support for spv::OpFunctionCall
// The only supported function is a single OpKill wrapped in a
@@ -608,15 +609,15 @@
switch(executionScope)
{
- case spv::ScopeWorkgroup:
- Yield(YieldResult::ControlBarrier);
- break;
- case spv::ScopeSubgroup:
- break;
- default:
- // See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
- UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
- break;
+ case spv::ScopeWorkgroup:
+ Yield(YieldResult::ControlBarrier);
+ break;
+ case spv::ScopeSubgroup:
+ break;
+ default:
+ // See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
+ UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
+ break;
}
return EmitResult::Continue;
@@ -654,7 +655,7 @@
}
}
-void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
+void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const
{
auto typeId = Type::ID(insn.word(1));
auto type = getType(typeId);
@@ -688,7 +689,7 @@
{
if(semantics == spv::MemorySemanticsMaskNone)
{
- return; //no-op
+ return; //no-op
}
rr::Fence(MemoryOrder(semantics));
}
diff --git a/src/Pipeline/SpirvShaderEnumNames.cpp b/src/Pipeline/SpirvShaderEnumNames.cpp
index 34cadd4..39a0bf0 100644
--- a/src/Pipeline/SpirvShaderEnumNames.cpp
+++ b/src/Pipeline/SpirvShaderEnumNames.cpp
@@ -14,15 +14,15 @@
// This file contains code used to aid debugging.
-#include <spirv/unified1/spirv.h>
#include "SpirvShader.hpp"
+#include <spirv/unified1/spirv.h>
// Prototypes for SPIRV-Tools functions that do not have public headers.
// This is a C++ function, so the name is mangled, and signature changes will
// result in a linker error instead of runtime signature mismatches.
// Gets the name of an instruction, without the "Op" prefix.
-extern const char* spvOpcodeString(const SpvOp opcode);
+extern const char *spvOpcodeString(const SpvOp opcode);
namespace sw {
@@ -31,4 +31,4 @@
return spvOpcodeString(static_cast<SpvOp>(op));
}
-} // namespace sw
+} // namespace sw
diff --git a/src/Pipeline/SpirvShaderGLSLstd450.cpp b/src/Pipeline/SpirvShaderGLSLstd450.cpp
index 50ae6a5..f6aaeca 100644
--- a/src/Pipeline/SpirvShaderGLSLstd450.cpp
+++ b/src/Pipeline/SpirvShaderGLSLstd450.cpp
@@ -16,12 +16,11 @@
#include "ShaderCore.hpp"
-#include <spirv/unified1/spirv.hpp>
#include <spirv/unified1/GLSL.std.450.h>
+#include <spirv/unified1/spirv.hpp>
-namespace
-{
- constexpr float PI = 3.141592653589793f;
+namespace {
+constexpr float PI = 3.141592653589793f;
}
namespace sw {
@@ -34,915 +33,925 @@
switch(extInstIndex)
{
- case GLSLstd450FAbs:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
+ case GLSLstd450FAbs:
{
- dst.move(i, Abs(src.Float(i)));
- }
- break;
- }
- case GLSLstd450SAbs:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Abs(src.Int(i)));
- }
- break;
- }
- case GLSLstd450Cross:
- {
- auto lhs = GenericValue(this, state, insn.word(5));
- auto rhs = GenericValue(this, state, insn.word(6));
- dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
- dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
- dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
- break;
- }
- case GLSLstd450Floor:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Floor(src.Float(i)));
- }
- break;
- }
- case GLSLstd450Trunc:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Trunc(src.Float(i)));
- }
- break;
- }
- case GLSLstd450Ceil:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Ceil(src.Float(i)));
- }
- break;
- }
- case GLSLstd450Fract:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Frac(src.Float(i)));
- }
- break;
- }
- case GLSLstd450Round:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Round(src.Float(i)));
- }
- break;
- }
- case GLSLstd450RoundEven:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- auto x = Round(src.Float(i));
- // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
- dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
- SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
- }
- break;
- }
- case GLSLstd450FMin:
- {
- auto lhs = GenericValue(this, state, insn.word(5));
- auto rhs = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
- }
- break;
- }
- case GLSLstd450FMax:
- {
- auto lhs = GenericValue(this, state, insn.word(5));
- auto rhs = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
- }
- break;
- }
- case GLSLstd450SMin:
- {
- auto lhs = GenericValue(this, state, insn.word(5));
- auto rhs = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
- }
- break;
- }
- case GLSLstd450SMax:
- {
- auto lhs = GenericValue(this, state, insn.word(5));
- auto rhs = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
- }
- break;
- }
- case GLSLstd450UMin:
- {
- auto lhs = GenericValue(this, state, insn.word(5));
- auto rhs = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
- }
- break;
- }
- case GLSLstd450UMax:
- {
- auto lhs = GenericValue(this, state, insn.word(5));
- auto rhs = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
- }
- break;
- }
- case GLSLstd450Step:
- {
- auto edge = GenericValue(this, state, insn.word(5));
- auto x = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
- }
- break;
- }
- case GLSLstd450SmoothStep:
- {
- auto edge0 = GenericValue(this, state, insn.word(5));
- auto edge1 = GenericValue(this, state, insn.word(6));
- auto x = GenericValue(this, state, insn.word(7));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
- (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
- dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
- }
- break;
- }
- case GLSLstd450FMix:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto y = GenericValue(this, state, insn.word(6));
- auto a = GenericValue(this, state, insn.word(7));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
- }
- break;
- }
- case GLSLstd450FClamp:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto minVal = GenericValue(this, state, insn.word(6));
- auto maxVal = GenericValue(this, state, insn.word(7));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
- }
- break;
- }
- case GLSLstd450SClamp:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto minVal = GenericValue(this, state, insn.word(6));
- auto maxVal = GenericValue(this, state, insn.word(7));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
- }
- break;
- }
- case GLSLstd450UClamp:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto minVal = GenericValue(this, state, insn.word(6));
- auto maxVal = GenericValue(this, state, insn.word(7));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
- }
- break;
- }
- case GLSLstd450FSign:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
- auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
- dst.move(i, neg | pos);
- }
- break;
- }
- case GLSLstd450SSign:
- {
- auto src = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
- auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
- dst.move(i, neg | pos);
- }
- break;
- }
- case GLSLstd450Reflect:
- {
- auto I = GenericValue(this, state, insn.word(5));
- auto N = GenericValue(this, state, insn.word(6));
-
- SIMD::Float d = Dot(type.sizeInComponents, I, N);
-
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
- }
- break;
- }
- case GLSLstd450Refract:
- {
- auto I = GenericValue(this, state, insn.word(5));
- auto N = GenericValue(this, state, insn.word(6));
- auto eta = GenericValue(this, state, insn.word(7));
-
- SIMD::Float d = Dot(type.sizeInComponents, I, N);
- SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
- SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
- SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
-
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
- }
- break;
- }
- case GLSLstd450FaceForward:
- {
- auto N = GenericValue(this, state, insn.word(5));
- auto I = GenericValue(this, state, insn.word(6));
- auto Nref = GenericValue(this, state, insn.word(7));
-
- SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
- SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
-
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- auto n = N.Float(i);
- dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
- }
- break;
- }
- case GLSLstd450Length:
- {
- auto x = GenericValue(this, state, insn.word(5));
- SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
-
- dst.move(0, Sqrt(d));
- break;
- }
- case GLSLstd450Normalize:
- {
- auto x = GenericValue(this, state, insn.word(5));
- SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
- SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
-
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, invLength * x.Float(i));
- }
- break;
- }
- case GLSLstd450Distance:
- {
- auto p0 = GenericValue(this, state, insn.word(5));
- auto p1 = GenericValue(this, state, insn.word(6));
- auto p0Type = getType(p0.type);
-
- // sqrt(dot(p0-p1, p0-p1))
- SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
-
- for(auto i = 1u; i < p0Type.sizeInComponents; i++)
- {
- d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
- }
-
- dst.move(0, Sqrt(d));
- break;
- }
- case GLSLstd450Modf:
- {
- auto val = GenericValue(this, state, insn.word(5));
- auto ptrId = Object::ID(insn.word(6));
- auto ptrTy = getType(getObject(ptrId).type);
- auto ptr = GetPointerToData(ptrId, 0, state);
- bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
- // TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
- // to be in bounds even for inactive lanes.
- // - Clarify the SPIR-V spec.
- // - Eliminate lane masking and assume interleaving.
- auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
-
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- SIMD::Float whole, frac;
- std::tie(whole, frac) = Modf(val.Float(i));
- dst.move(i, frac);
- auto p = ptr + (i * sizeof(float));
- if(interleavedByLane) { p = InterleaveByLane(p); }
- p.Store(whole, robustness, state->activeLaneMask());
- }
- break;
- }
- case GLSLstd450ModfStruct:
- {
- auto val = GenericValue(this, state, insn.word(5));
- auto valTy = getType(val.type);
-
- for(auto i = 0u; i < valTy.sizeInComponents; i++)
- {
- SIMD::Float whole, frac;
- std::tie(whole, frac) = Modf(val.Float(i));
- dst.move(i, frac);
- dst.move(i + valTy.sizeInComponents, whole);
- }
- break;
- }
- case GLSLstd450PackSnorm4x8:
- {
- auto val = GenericValue(this, state, insn.word(5));
- dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
- SIMD::Int(0xFF)) |
- ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
- SIMD::Int(0xFF)) << 8) |
- ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
- SIMD::Int(0xFF)) << 16) |
- ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
- SIMD::Int(0xFF)) << 24));
- break;
- }
- case GLSLstd450PackUnorm4x8:
- {
- auto val = GenericValue(this, state, insn.word(5));
- dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
- ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
- ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
- ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
- break;
- }
- case GLSLstd450PackSnorm2x16:
- {
- auto val = GenericValue(this, state, insn.word(5));
- dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
- SIMD::Int(0xFFFF)) |
- ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
- SIMD::Int(0xFFFF)) << 16));
- break;
- }
- case GLSLstd450PackUnorm2x16:
- {
- auto val = GenericValue(this, state, insn.word(5));
- dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
- SIMD::UInt(0xFFFF)) |
- ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
- SIMD::UInt(0xFFFF)) << 16));
- break;
- }
- case GLSLstd450PackHalf2x16:
- {
- auto val = GenericValue(this, state, insn.word(5));
- dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
- break;
- }
- case GLSLstd450UnpackSnorm4x8:
- {
- auto val = GenericValue(this, state, insn.word(5));
- dst.move(0, Min(Max(SIMD::Float(((val.Int(0)<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- dst.move(1, Min(Max(SIMD::Float(((val.Int(0)<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- dst.move(2, Min(Max(SIMD::Float(((val.Int(0)<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- break;
- }
- case GLSLstd450UnpackUnorm4x8:
- {
- auto val = GenericValue(this, state, insn.word(5));
- dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(1, SIMD::Float(((val.UInt(0)>>8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(2, SIMD::Float(((val.UInt(0)>>16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(3, SIMD::Float(((val.UInt(0)>>24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
- break;
- }
- case GLSLstd450UnpackSnorm2x16:
- {
- auto val = GenericValue(this, state, insn.word(5));
- // clamp(f / 32767.0, -1.0, 1.0)
- dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
- SIMD::Float(1.0f / float(0x7FFF0000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
- SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- break;
- }
- case GLSLstd450UnpackUnorm2x16:
- {
- auto val = GenericValue(this, state, insn.word(5));
- // f / 65535.0
- dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
- dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
- break;
- }
- case GLSLstd450UnpackHalf2x16:
- {
- auto val = GenericValue(this, state, insn.word(5));
- dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
- dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
- break;
- }
- case GLSLstd450Fma:
- {
- auto a = GenericValue(this, state, insn.word(5));
- auto b = GenericValue(this, state, insn.word(6));
- auto c = GenericValue(this, state, insn.word(7));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
- }
- break;
- }
- case GLSLstd450Frexp:
- {
- auto val = GenericValue(this, state, insn.word(5));
- auto ptrId = Object::ID(insn.word(6));
- auto ptrTy = getType(getObject(ptrId).type);
- auto ptr = GetPointerToData(ptrId, 0, state);
- bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
- // TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
- // to be in bounds even for inactive lanes.
- // - Clarify the SPIR-V spec.
- // - Eliminate lane masking and assume interleaving.
- auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
-
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- SIMD::Float significand;
- SIMD::Int exponent;
- std::tie(significand, exponent) = Frexp(val.Float(i));
-
- dst.move(i, significand);
-
- auto p = ptr + (i * sizeof(float));
- if(interleavedByLane) { p = InterleaveByLane(p); }
- p.Store(exponent, robustness, state->activeLaneMask());
- }
- break;
- }
- case GLSLstd450FrexpStruct:
- {
- auto val = GenericValue(this, state, insn.word(5));
- auto numComponents = getType(val.type).sizeInComponents;
- for(auto i = 0u; i < numComponents; i++)
- {
- auto significandAndExponent = Frexp(val.Float(i));
- dst.move(i, significandAndExponent.first);
- dst.move(i + numComponents, significandAndExponent.second);
- }
- break;
- }
- case GLSLstd450Ldexp:
- {
- auto significand = GenericValue(this, state, insn.word(5));
- auto exponent = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- // Assumes IEEE 754
- auto in = significand.Float(i);
- auto significandExponent = Exponent(in);
- auto combinedExponent = exponent.Int(i) + significandExponent;
- auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
- auto isSignificandInf = SIMD::UInt(IsInf(in));
- auto isSignificandNaN = SIMD::UInt(IsNan(in));
- auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
- auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
- auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
-
- SIMD::UInt v;
- v = significand.UInt(i) & SIMD::UInt(0x7FFFFF); // Add significand.
- v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23)); // Add exponent.
- v &= isExponentInBounds; // Clear v if the exponent is OOB.
-
- v |= significand.UInt(i) & SIMD::UInt(0x80000000); // Add sign bit.
- v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000); // Mark as inf if the exponent is too great.
-
- // If the input significand is zero, inf or nan, just return the
- // input significand.
- auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
- v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
-
- dst.move(i, As<SIMD::Float>(v));
- }
- break;
- }
- case GLSLstd450Radians:
- {
- auto degrees = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
- }
- break;
- }
- case GLSLstd450Degrees:
- {
- auto radians = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
- }
- break;
- }
- case GLSLstd450Sin:
- {
- auto radians = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Sin(radians.Float(i)));
- }
- break;
- }
- case GLSLstd450Cos:
- {
- auto radians = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Cos(radians.Float(i)));
- }
- break;
- }
- case GLSLstd450Tan:
- {
- auto radians = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Tan(radians.Float(i)));
- }
- break;
- }
- case GLSLstd450Asin:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Asin(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Acos:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Acos(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Atan:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Atan(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Sinh:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Sinh(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Cosh:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Cosh(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Tanh:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Tanh(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Asinh:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Asinh(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Acosh:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Acosh(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Atanh:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Atanh(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Atan2:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto y = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Atan2(x.Float(i), y.Float(i)));
- }
- break;
- }
- case GLSLstd450Pow:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto y = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Pow(x.Float(i), y.Float(i)));
- }
- break;
- }
- case GLSLstd450Exp:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Exp(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Log:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Log(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Exp2:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Exp2(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Log2:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Log2(val.Float(i)));
- }
- break;
- }
- case GLSLstd450Sqrt:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, Sqrt(val.Float(i)));
- }
- break;
- }
- case GLSLstd450InverseSqrt:
- {
- auto val = GenericValue(this, state, insn.word(5));
- Decorations d;
- ApplyDecorationsForId(&d, insn.word(5));
- if(d.RelaxedPrecision)
- {
+ auto src = GenericValue(this, state, insn.word(5));
for(auto i = 0u; i < type.sizeInComponents; i++)
{
- dst.move(i, RcpSqrt_pp(val.Float(i)));
+ dst.move(i, Abs(src.Float(i)));
}
+ break;
}
- else
+ case GLSLstd450SAbs:
{
+ auto src = GenericValue(this, state, insn.word(5));
for(auto i = 0u; i < type.sizeInComponents; i++)
{
- dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i)));
- }
- }
- break;
- }
- case GLSLstd450Determinant:
- {
- auto mat = GenericValue(this, state, insn.word(5));
- auto numComponents = getType(mat.type).sizeInComponents;
- switch(numComponents)
- {
- case 4: // 2x2
- dst.move(0, Determinant(
- mat.Float(0), mat.Float(1),
- mat.Float(2), mat.Float(3)));
- break;
- case 9: // 3x3
- dst.move(0, Determinant(
- mat.Float(0), mat.Float(1), mat.Float(2),
- mat.Float(3), mat.Float(4), mat.Float(5),
- mat.Float(6), mat.Float(7), mat.Float(8)));
- break;
- case 16: // 4x4
- dst.move(0, Determinant(
- mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
- mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
- mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
- mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
- break;
- default:
- UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents));
- }
- break;
- }
- case GLSLstd450MatrixInverse:
- {
- auto mat = GenericValue(this, state, insn.word(5));
- auto numComponents = getType(mat.type).sizeInComponents;
- switch(numComponents)
- {
- case 4: // 2x2
- {
- auto inv = MatrixInverse(
- mat.Float(0), mat.Float(1),
- mat.Float(2), mat.Float(3));
- for(uint32_t i = 0; i < inv.size(); i++)
- {
- dst.move(i, inv[i]);
+ dst.move(i, Abs(src.Int(i)));
}
break;
}
- case 9: // 3x3
+ case GLSLstd450Cross:
{
- auto inv = MatrixInverse(
- mat.Float(0), mat.Float(1), mat.Float(2),
- mat.Float(3), mat.Float(4), mat.Float(5),
- mat.Float(6), mat.Float(7), mat.Float(8));
- for(uint32_t i = 0; i < inv.size(); i++)
+ auto lhs = GenericValue(this, state, insn.word(5));
+ auto rhs = GenericValue(this, state, insn.word(6));
+ dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
+ dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
+ dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
+ break;
+ }
+ case GLSLstd450Floor:
+ {
+ auto src = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
{
- dst.move(i, inv[i]);
+ dst.move(i, Floor(src.Float(i)));
}
break;
}
- case 16: // 4x4
+ case GLSLstd450Trunc:
{
- auto inv = MatrixInverse(
- mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
- mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
- mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
- mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
- for(uint32_t i = 0; i < inv.size(); i++)
+ auto src = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
{
- dst.move(i, inv[i]);
+ dst.move(i, Trunc(src.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Ceil:
+ {
+ auto src = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Ceil(src.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Fract:
+ {
+ auto src = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Frac(src.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Round:
+ {
+ auto src = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Round(src.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450RoundEven:
+ {
+ auto src = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto x = Round(src.Float(i));
+ // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
+ dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
+ SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
+ }
+ break;
+ }
+ case GLSLstd450FMin:
+ {
+ auto lhs = GenericValue(this, state, insn.word(5));
+ auto rhs = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450FMax:
+ {
+ auto lhs = GenericValue(this, state, insn.word(5));
+ auto rhs = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450SMin:
+ {
+ auto lhs = GenericValue(this, state, insn.word(5));
+ auto rhs = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
+ }
+ break;
+ }
+ case GLSLstd450SMax:
+ {
+ auto lhs = GenericValue(this, state, insn.word(5));
+ auto rhs = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
+ }
+ break;
+ }
+ case GLSLstd450UMin:
+ {
+ auto lhs = GenericValue(this, state, insn.word(5));
+ auto rhs = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
+ }
+ break;
+ }
+ case GLSLstd450UMax:
+ {
+ auto lhs = GenericValue(this, state, insn.word(5));
+ auto rhs = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
+ }
+ break;
+ }
+ case GLSLstd450Step:
+ {
+ auto edge = GenericValue(this, state, insn.word(5));
+ auto x = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
+ }
+ break;
+ }
+ case GLSLstd450SmoothStep:
+ {
+ auto edge0 = GenericValue(this, state, insn.word(5));
+ auto edge1 = GenericValue(this, state, insn.word(6));
+ auto x = GenericValue(this, state, insn.word(7));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
+ (edge1.Float(i) - edge0.Float(i)),
+ SIMD::Float(0.0f)),
+ SIMD::Float(1.0f));
+ dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
+ }
+ break;
+ }
+ case GLSLstd450FMix:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto y = GenericValue(this, state, insn.word(6));
+ auto a = GenericValue(this, state, insn.word(7));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
+ }
+ break;
+ }
+ case GLSLstd450FClamp:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto minVal = GenericValue(this, state, insn.word(6));
+ auto maxVal = GenericValue(this, state, insn.word(7));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450SClamp:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto minVal = GenericValue(this, state, insn.word(6));
+ auto maxVal = GenericValue(this, state, insn.word(7));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
+ }
+ break;
+ }
+ case GLSLstd450UClamp:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto minVal = GenericValue(this, state, insn.word(6));
+ auto maxVal = GenericValue(this, state, insn.word(7));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
+ }
+ break;
+ }
+ case GLSLstd450FSign:
+ {
+ auto src = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
+ auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
+ dst.move(i, neg | pos);
+ }
+ break;
+ }
+ case GLSLstd450SSign:
+ {
+ auto src = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
+ auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
+ dst.move(i, neg | pos);
+ }
+ break;
+ }
+ case GLSLstd450Reflect:
+ {
+ auto I = GenericValue(this, state, insn.word(5));
+ auto N = GenericValue(this, state, insn.word(6));
+
+ SIMD::Float d = Dot(type.sizeInComponents, I, N);
+
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
+ }
+ break;
+ }
+ case GLSLstd450Refract:
+ {
+ auto I = GenericValue(this, state, insn.word(5));
+ auto N = GenericValue(this, state, insn.word(6));
+ auto eta = GenericValue(this, state, insn.word(7));
+
+ SIMD::Float d = Dot(type.sizeInComponents, I, N);
+ SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
+ SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
+ SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
+
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450FaceForward:
+ {
+ auto N = GenericValue(this, state, insn.word(5));
+ auto I = GenericValue(this, state, insn.word(6));
+ auto Nref = GenericValue(this, state, insn.word(7));
+
+ SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
+ SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
+
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto n = N.Float(i);
+ dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
+ }
+ break;
+ }
+ case GLSLstd450Length:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
+
+ dst.move(0, Sqrt(d));
+ break;
+ }
+ case GLSLstd450Normalize:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
+ SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
+
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, invLength * x.Float(i));
+ }
+ break;
+ }
+ case GLSLstd450Distance:
+ {
+ auto p0 = GenericValue(this, state, insn.word(5));
+ auto p1 = GenericValue(this, state, insn.word(6));
+ auto p0Type = getType(p0.type);
+
+ // sqrt(dot(p0-p1, p0-p1))
+ SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
+
+ for(auto i = 1u; i < p0Type.sizeInComponents; i++)
+ {
+ d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
+ }
+
+ dst.move(0, Sqrt(d));
+ break;
+ }
+ case GLSLstd450Modf:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ auto ptrId = Object::ID(insn.word(6));
+ auto ptrTy = getType(getObject(ptrId).type);
+ auto ptr = GetPointerToData(ptrId, 0, state);
+ bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+ // TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
+ // to be in bounds even for inactive lanes.
+ // - Clarify the SPIR-V spec.
+ // - Eliminate lane masking and assume interleaving.
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
+
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ SIMD::Float whole, frac;
+ std::tie(whole, frac) = Modf(val.Float(i));
+ dst.move(i, frac);
+ auto p = ptr + (i * sizeof(float));
+ if(interleavedByLane) { p = InterleaveByLane(p); }
+ p.Store(whole, robustness, state->activeLaneMask());
+ }
+ break;
+ }
+ case GLSLstd450ModfStruct:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ auto valTy = getType(val.type);
+
+ for(auto i = 0u; i < valTy.sizeInComponents; i++)
+ {
+ SIMD::Float whole, frac;
+ std::tie(whole, frac) = Modf(val.Float(i));
+ dst.move(i, frac);
+ dst.move(i + valTy.sizeInComponents, whole);
+ }
+ break;
+ }
+ case GLSLstd450PackSnorm4x8:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF)) |
+ ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF))
+ << 8) |
+ ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF))
+ << 16) |
+ ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF))
+ << 24));
+ break;
+ }
+ case GLSLstd450PackUnorm4x8:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
+ ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
+ ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
+ ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
+ break;
+ }
+ case GLSLstd450PackSnorm2x16:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
+ SIMD::Int(0xFFFF)) |
+ ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
+ SIMD::Int(0xFFFF))
+ << 16));
+ break;
+ }
+ case GLSLstd450PackUnorm2x16:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
+ SIMD::UInt(0xFFFF)) |
+ ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
+ SIMD::UInt(0xFFFF))
+ << 16));
+ break;
+ }
+ case GLSLstd450PackHalf2x16:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
+ break;
+ }
+ case GLSLstd450UnpackSnorm4x8:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ dst.move(0, Min(Max(SIMD::Float(((val.Int(0) << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(1, Min(Max(SIMD::Float(((val.Int(0) << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(2, Min(Max(SIMD::Float(((val.Int(0) << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ break;
+ }
+ case GLSLstd450UnpackUnorm4x8:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(1, SIMD::Float(((val.UInt(0) >> 8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(2, SIMD::Float(((val.UInt(0) >> 16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(3, SIMD::Float(((val.UInt(0) >> 24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+ break;
+ }
+ case GLSLstd450UnpackSnorm2x16:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ // clamp(f / 32767.0, -1.0, 1.0)
+ dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
+ SIMD::Float(1.0f / float(0x7FFF0000)),
+ SIMD::Float(-1.0f)),
+ SIMD::Float(1.0f)));
+ dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
+ SIMD::Float(-1.0f)),
+ SIMD::Float(1.0f)));
+ break;
+ }
+ case GLSLstd450UnpackUnorm2x16:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ // f / 65535.0
+ dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
+ dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
+ break;
+ }
+ case GLSLstd450UnpackHalf2x16:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
+ dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
+ break;
+ }
+ case GLSLstd450Fma:
+ {
+ auto a = GenericValue(this, state, insn.word(5));
+ auto b = GenericValue(this, state, insn.word(6));
+ auto c = GenericValue(this, state, insn.word(7));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Frexp:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ auto ptrId = Object::ID(insn.word(6));
+ auto ptrTy = getType(getObject(ptrId).type);
+ auto ptr = GetPointerToData(ptrId, 0, state);
+ bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+ // TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
+ // to be in bounds even for inactive lanes.
+ // - Clarify the SPIR-V spec.
+ // - Eliminate lane masking and assume interleaving.
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
+
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ SIMD::Float significand;
+ SIMD::Int exponent;
+ std::tie(significand, exponent) = Frexp(val.Float(i));
+
+ dst.move(i, significand);
+
+ auto p = ptr + (i * sizeof(float));
+ if(interleavedByLane) { p = InterleaveByLane(p); }
+ p.Store(exponent, robustness, state->activeLaneMask());
+ }
+ break;
+ }
+ case GLSLstd450FrexpStruct:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ auto numComponents = getType(val.type).sizeInComponents;
+ for(auto i = 0u; i < numComponents; i++)
+ {
+ auto significandAndExponent = Frexp(val.Float(i));
+ dst.move(i, significandAndExponent.first);
+ dst.move(i + numComponents, significandAndExponent.second);
+ }
+ break;
+ }
+ case GLSLstd450Ldexp:
+ {
+ auto significand = GenericValue(this, state, insn.word(5));
+ auto exponent = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ // Assumes IEEE 754
+ auto in = significand.Float(i);
+ auto significandExponent = Exponent(in);
+ auto combinedExponent = exponent.Int(i) + significandExponent;
+ auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
+ auto isSignificandInf = SIMD::UInt(IsInf(in));
+ auto isSignificandNaN = SIMD::UInt(IsNan(in));
+ auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
+ auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
+ auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
+
+ SIMD::UInt v;
+ v = significand.UInt(i) & SIMD::UInt(0x7FFFFF); // Add significand.
+ v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23)); // Add exponent.
+ v &= isExponentInBounds; // Clear v if the exponent is OOB.
+
+ v |= significand.UInt(i) & SIMD::UInt(0x80000000); // Add sign bit.
+ v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000); // Mark as inf if the exponent is too great.
+
+ // If the input significand is zero, inf or nan, just return the
+ // input significand.
+ auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
+ v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
+
+ dst.move(i, As<SIMD::Float>(v));
+ }
+ break;
+ }
+ case GLSLstd450Radians:
+ {
+ auto degrees = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
+ }
+ break;
+ }
+ case GLSLstd450Degrees:
+ {
+ auto radians = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
+ }
+ break;
+ }
+ case GLSLstd450Sin:
+ {
+ auto radians = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Sin(radians.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Cos:
+ {
+ auto radians = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Cos(radians.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Tan:
+ {
+ auto radians = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Tan(radians.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Asin:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Asin(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Acos:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Acos(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Atan:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Atan(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Sinh:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Sinh(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Cosh:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Cosh(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Tanh:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Tanh(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Asinh:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Asinh(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Acosh:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Acosh(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Atanh:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Atanh(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Atan2:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto y = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Atan2(x.Float(i), y.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Pow:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto y = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Pow(x.Float(i), y.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Exp:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Exp(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Log:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Log(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Exp2:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Exp2(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Log2:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Log2(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450Sqrt:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, Sqrt(val.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450InverseSqrt:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ Decorations d;
+ ApplyDecorationsForId(&d, insn.word(5));
+ if(d.RelaxedPrecision)
+ {
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, RcpSqrt_pp(val.Float(i)));
+ }
+ }
+ else
+ {
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i)));
+ }
+ }
+ break;
+ }
+ case GLSLstd450Determinant:
+ {
+ auto mat = GenericValue(this, state, insn.word(5));
+ auto numComponents = getType(mat.type).sizeInComponents;
+ switch(numComponents)
+ {
+ case 4: // 2x2
+ dst.move(0, Determinant(
+ mat.Float(0), mat.Float(1),
+ mat.Float(2), mat.Float(3)));
+ break;
+ case 9: // 3x3
+ dst.move(0, Determinant(
+ mat.Float(0), mat.Float(1), mat.Float(2),
+ mat.Float(3), mat.Float(4), mat.Float(5),
+ mat.Float(6), mat.Float(7), mat.Float(8)));
+ break;
+ case 16: // 4x4
+ dst.move(0, Determinant(
+ mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
+ mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
+ mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
+ mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
+ break;
+ default:
+ UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents));
+ }
+ break;
+ }
+ case GLSLstd450MatrixInverse:
+ {
+ auto mat = GenericValue(this, state, insn.word(5));
+ auto numComponents = getType(mat.type).sizeInComponents;
+ switch(numComponents)
+ {
+ case 4: // 2x2
+ {
+ auto inv = MatrixInverse(
+ mat.Float(0), mat.Float(1),
+ mat.Float(2), mat.Float(3));
+ for(uint32_t i = 0; i < inv.size(); i++)
+ {
+ dst.move(i, inv[i]);
+ }
+ break;
+ }
+ case 9: // 3x3
+ {
+ auto inv = MatrixInverse(
+ mat.Float(0), mat.Float(1), mat.Float(2),
+ mat.Float(3), mat.Float(4), mat.Float(5),
+ mat.Float(6), mat.Float(7), mat.Float(8));
+ for(uint32_t i = 0; i < inv.size(); i++)
+ {
+ dst.move(i, inv[i]);
+ }
+ break;
+ }
+ case 16: // 4x4
+ {
+ auto inv = MatrixInverse(
+ mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
+ mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
+ mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
+ mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
+ for(uint32_t i = 0; i < inv.size(); i++)
+ {
+ dst.move(i, inv[i]);
+ }
+ break;
+ }
+ default:
+ UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents));
+ }
+ break;
+ }
+ case GLSLstd450IMix:
+ {
+ UNREACHABLE("GLSLstd450IMix has been removed from the specification");
+ break;
+ }
+ case GLSLstd450PackDouble2x32:
+ {
+ UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
+ break;
+ }
+ case GLSLstd450UnpackDouble2x32:
+ {
+ UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
+ break;
+ }
+ case GLSLstd450FindILsb:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto v = val.UInt(i);
+ dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
+ }
+ break;
+ }
+ case GLSLstd450FindSMsb:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
+ dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
+ }
+ break;
+ }
+ case GLSLstd450FindUMsb:
+ {
+ auto val = GenericValue(this, state, insn.word(5));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
+ }
+ break;
+ }
+ case GLSLstd450InterpolateAtCentroid:
+ {
+ UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+ break;
+ }
+ case GLSLstd450InterpolateAtSample:
+ {
+ UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+ break;
+ }
+ case GLSLstd450InterpolateAtOffset:
+ {
+ UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+ break;
+ }
+ case GLSLstd450NMin:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto y = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, NMin(x.Float(i), y.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450NMax:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto y = GenericValue(this, state, insn.word(6));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, NMax(x.Float(i), y.Float(i)));
+ }
+ break;
+ }
+ case GLSLstd450NClamp:
+ {
+ auto x = GenericValue(this, state, insn.word(5));
+ auto minVal = GenericValue(this, state, insn.word(6));
+ auto maxVal = GenericValue(this, state, insn.word(7));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
+ dst.move(i, clamp);
}
break;
}
default:
- UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents));
- }
- break;
- }
- case GLSLstd450IMix:
- {
- UNREACHABLE("GLSLstd450IMix has been removed from the specification");
- break;
- }
- case GLSLstd450PackDouble2x32:
- {
- UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
- break;
- }
- case GLSLstd450UnpackDouble2x32:
- {
- UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
- break;
- }
- case GLSLstd450FindILsb:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- auto v = val.UInt(i);
- dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
- }
- break;
- }
- case GLSLstd450FindSMsb:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
- dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
- }
- break;
- }
- case GLSLstd450FindUMsb:
- {
- auto val = GenericValue(this, state, insn.word(5));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
- }
- break;
- }
- case GLSLstd450InterpolateAtCentroid:
- {
- UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
- break;
- }
- case GLSLstd450InterpolateAtSample:
- {
- UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
- break;
- }
- case GLSLstd450InterpolateAtOffset:
- {
- UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
- break;
- }
- case GLSLstd450NMin:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto y = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, NMin(x.Float(i), y.Float(i)));
- }
- break;
- }
- case GLSLstd450NMax:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto y = GenericValue(this, state, insn.word(6));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, NMax(x.Float(i), y.Float(i)));
- }
- break;
- }
- case GLSLstd450NClamp:
- {
- auto x = GenericValue(this, state, insn.word(5));
- auto minVal = GenericValue(this, state, insn.word(6));
- auto maxVal = GenericValue(this, state, insn.word(7));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
- dst.move(i, clamp);
- }
- break;
- }
- default:
- UNREACHABLE("ExtInst %d", int(extInstIndex));
- break;
+ UNREACHABLE("ExtInst %d", int(extInstIndex));
+ break;
}
return EmitResult::Continue;
diff --git a/src/Pipeline/SpirvShaderGroup.cpp b/src/Pipeline/SpirvShaderGroup.cpp
index cc46949..77b73ea 100644
--- a/src/Pipeline/SpirvShaderGroup.cpp
+++ b/src/Pipeline/SpirvShaderGroup.cpp
@@ -18,20 +18,21 @@
namespace sw {
-struct SpirvShader::GroupOps {
+struct SpirvShader::GroupOps
+{
// Template function to perform a binary operation.
// |TYPE| should be the type of the identity value (as an SIMD::<Type>).
// |APPLY| should be a callable object that takes two RValue<TYPE> parameters
// and returns a new RValue<TYPE> corresponding to the operation's result.
- template <typename TYPE, typename APPLY>
+ template<typename TYPE, typename APPLY>
static void BinaryOperation(
- const SpirvShader* shader,
- const SpirvShader::InsnIterator& insn,
- const SpirvShader::EmitState* state,
- Intermediate& dst,
- const TYPE& identity,
- APPLY&& apply)
+ const SpirvShader *shader,
+ const SpirvShader::InsnIterator &insn,
+ const SpirvShader::EmitState *state,
+ Intermediate &dst,
+ const TYPE &identity,
+ APPLY &&apply)
{
SpirvShader::GenericValue value(shader, state, insn.word(5));
auto &type = shader->getType(SpirvShader::Type::ID(insn.word(1)));
@@ -42,40 +43,38 @@
TYPE v = As<TYPE>(v_uint);
switch(spv::GroupOperation(insn.word(4)))
{
- case spv::GroupOperationReduce:
- {
- // NOTE: floating-point add and multiply are not really commutative so
- // ensure that all values in the final lanes are identical
- TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw]
- TYPE v3 = apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw]
- dst.move(i, v3);
- break;
- }
- case spv::GroupOperationInclusiveScan:
- {
- TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw]
- TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
- dst.move(i, v3);
- break;
- }
- case spv::GroupOperationExclusiveScan:
- {
- TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw]
- TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
- auto v4 = Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz]
- dst.move(i, v4);
- break;
- }
- default:
- UNIMPLEMENTED("EmitGroupNonUniform op: %s Group operation: %d",
- SpirvShader::OpcodeName(type.opcode()).c_str(), insn.word(4));
+ case spv::GroupOperationReduce:
+ {
+ // NOTE: floating-point add and multiply are not really commutative so
+ // ensure that all values in the final lanes are identical
+ TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw]
+ TYPE v3 = apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw]
+ dst.move(i, v3);
+ break;
+ }
+ case spv::GroupOperationInclusiveScan:
+ {
+ TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw]
+ TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
+ dst.move(i, v3);
+ break;
+ }
+ case spv::GroupOperationExclusiveScan:
+ {
+ TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw]
+ TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
+ auto v4 = Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz]
+ dst.move(i, v4);
+ break;
+ }
+ default:
+ UNIMPLEMENTED("EmitGroupNonUniform op: %s Group operation: %d",
+ SpirvShader::OpcodeName(type.opcode()).c_str(), insn.word(4));
}
}
}
-
};
-
SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
{
static_assert(SIMD::Width == 4, "EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4");
@@ -89,420 +88,404 @@
switch(insn.opcode())
{
- case spv::OpGroupNonUniformElect:
- {
- // Result is true only in the active invocation with the lowest id
- // in the group, otherwise result is false.
- SIMD::Int active = state->activeLaneMask();
- // TODO: Would be nice if we could write this as:
- // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
- auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
- auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
- dst.move(0, elect);
- break;
- }
-
- case spv::OpGroupNonUniformAll:
- {
- GenericValue predicate(this, state, insn.word(4));
- dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(state->activeLaneMask())));
- break;
- }
-
- case spv::OpGroupNonUniformAny:
- {
- GenericValue predicate(this, state, insn.word(4));
- dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(state->activeLaneMask())));
- break;
- }
-
- case spv::OpGroupNonUniformAllEqual:
- {
- GenericValue value(this, state, insn.word(4));
- auto res = SIMD::UInt(0xffffffff);
- SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask());
- SIMD::UInt inactive = ~active;
- for(auto i = 0u; i < type.sizeInComponents; i++)
+ case spv::OpGroupNonUniformElect:
{
- SIMD::UInt v = value.UInt(i) & active;
- SIMD::UInt filled = v;
- for(int j = 0; j < SIMD::Width - 1; j++)
+ // Result is true only in the active invocation with the lowest id
+ // in the group, otherwise result is false.
+ SIMD::Int active = state->activeLaneMask();
+ // TODO: Would be nice if we could write this as:
+ // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
+ auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
+ dst.move(0, elect);
+ break;
+ }
+
+ case spv::OpGroupNonUniformAll:
+ {
+ GenericValue predicate(this, state, insn.word(4));
+ dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(state->activeLaneMask())));
+ break;
+ }
+
+ case spv::OpGroupNonUniformAny:
+ {
+ GenericValue predicate(this, state, insn.word(4));
+ dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(state->activeLaneMask())));
+ break;
+ }
+
+ case spv::OpGroupNonUniformAllEqual:
+ {
+ GenericValue value(this, state, insn.word(4));
+ auto res = SIMD::UInt(0xffffffff);
+ SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask());
+ SIMD::UInt inactive = ~active;
+ for(auto i = 0u; i < type.sizeInComponents; i++)
{
- filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
+ SIMD::UInt v = value.UInt(i) & active;
+ SIMD::UInt filled = v;
+ for(int j = 0; j < SIMD::Width - 1; j++)
+ {
+ filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
+ }
+ res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx));
}
- res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx));
- }
- dst.move(0, res);
- break;
- }
-
- case spv::OpGroupNonUniformBroadcast:
- {
- auto valueId = Object::ID(insn.word(4));
- auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
- GenericValue value(this, state, valueId);
- auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, OrAll(value.Int(i) & mask));
- }
- break;
- }
-
- case spv::OpGroupNonUniformBroadcastFirst:
- {
- auto valueId = Object::ID(insn.word(4));
- GenericValue value(this, state, valueId);
- // Result is true only in the active invocation with the lowest id
- // in the group, otherwise result is false.
- SIMD::Int active = state->activeLaneMask();
- // TODO: Would be nice if we could write this as:
- // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
- auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
- auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- dst.move(i, OrAll(value.Int(i) & elect));
- }
- break;
- }
-
- case spv::OpGroupNonUniformBallot:
- {
- ASSERT(type.sizeInComponents == 4);
- GenericValue predicate(this, state, insn.word(4));
- dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0))));
- dst.move(1, SIMD::Int(0));
- dst.move(2, SIMD::Int(0));
- dst.move(3, SIMD::Int(0));
- break;
- }
-
- case spv::OpGroupNonUniformInverseBallot:
- {
- auto valueId = Object::ID(insn.word(4));
- ASSERT(type.sizeInComponents == 1);
- ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
- GenericValue value(this, state, valueId);
- auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1);
- dst.move(0, -bit);
- break;
- }
-
- case spv::OpGroupNonUniformBallotBitExtract:
- {
- auto valueId = Object::ID(insn.word(4));
- auto indexId = Object::ID(insn.word(5));
- ASSERT(type.sizeInComponents == 1);
- ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
- ASSERT(getType(getObject(indexId).type).sizeInComponents == 1);
- GenericValue value(this, state, valueId);
- GenericValue index(this, state, indexId);
- auto vecIdx = index.Int(0) / SIMD::Int(32);
- auto bitIdx = index.Int(0) & SIMD::Int(31);
- auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) |
- (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) |
- (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) |
- (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3)));
- dst.move(0, -((bits >> bitIdx) & SIMD::Int(1)));
- break;
- }
-
- case spv::OpGroupNonUniformBallotBitCount:
- {
- auto operation = spv::GroupOperation(insn.word(4));
- auto valueId = Object::ID(insn.word(5));
- ASSERT(type.sizeInComponents == 1);
- ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
- GenericValue value(this, state, valueId);
- switch(operation)
- {
- case spv::GroupOperationReduce:
- dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15)));
+ dst.move(0, res);
break;
- case spv::GroupOperationInclusiveScan:
- dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15)));
+ }
+
+ case spv::OpGroupNonUniformBroadcast:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
+ GenericValue value(this, state, valueId);
+ auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, OrAll(value.Int(i) & mask));
+ }
break;
- case spv::GroupOperationExclusiveScan:
- dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7)));
+ }
+
+ case spv::OpGroupNonUniformBroadcastFirst:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ GenericValue value(this, state, valueId);
+ // Result is true only in the active invocation with the lowest id
+ // in the group, otherwise result is false.
+ SIMD::Int active = state->activeLaneMask();
+ // TODO: Would be nice if we could write this as:
+ // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
+ auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, OrAll(value.Int(i) & elect));
+ }
break;
+ }
+
+ case spv::OpGroupNonUniformBallot:
+ {
+ ASSERT(type.sizeInComponents == 4);
+ GenericValue predicate(this, state, insn.word(4));
+ dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0))));
+ dst.move(1, SIMD::Int(0));
+ dst.move(2, SIMD::Int(0));
+ dst.move(3, SIMD::Int(0));
+ break;
+ }
+
+ case spv::OpGroupNonUniformInverseBallot:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ GenericValue value(this, state, valueId);
+ auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1);
+ dst.move(0, -bit);
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallotBitExtract:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ auto indexId = Object::ID(insn.word(5));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ ASSERT(getType(getObject(indexId).type).sizeInComponents == 1);
+ GenericValue value(this, state, valueId);
+ GenericValue index(this, state, indexId);
+ auto vecIdx = index.Int(0) / SIMD::Int(32);
+ auto bitIdx = index.Int(0) & SIMD::Int(31);
+ auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) |
+ (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) |
+ (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) |
+ (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3)));
+ dst.move(0, -((bits >> bitIdx) & SIMD::Int(1)));
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallotBitCount:
+ {
+ auto operation = spv::GroupOperation(insn.word(4));
+ auto valueId = Object::ID(insn.word(5));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ GenericValue value(this, state, valueId);
+ switch(operation)
+ {
+ case spv::GroupOperationReduce:
+ dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15)));
+ break;
+ case spv::GroupOperationInclusiveScan:
+ dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15)));
+ break;
+ case spv::GroupOperationExclusiveScan:
+ dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7)));
+ break;
+ default:
+ UNSUPPORTED("GroupOperation %d", int(operation));
+ }
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallotFindLSB:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ GenericValue value(this, state, valueId);
+ dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true));
+ break;
+ }
+
+ case spv::OpGroupNonUniformBallotFindMSB:
+ {
+ auto valueId = Object::ID(insn.word(4));
+ ASSERT(type.sizeInComponents == 1);
+ ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
+ GenericValue value(this, state, valueId);
+ dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false));
+ break;
+ }
+
+ case spv::OpGroupNonUniformShuffle:
+ {
+ GenericValue value(this, state, insn.word(4));
+ GenericValue id(this, state, insn.word(5));
+ auto x = CmpEQ(SIMD::Int(0), id.Int(0));
+ auto y = CmpEQ(SIMD::Int(1), id.Int(0));
+ auto z = CmpEQ(SIMD::Int(2), id.Int(0));
+ auto w = CmpEQ(SIMD::Int(3), id.Int(0));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ SIMD::Int v = value.Int(i);
+ dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
+ }
+ break;
+ }
+
+ case spv::OpGroupNonUniformShuffleXor:
+ {
+ GenericValue value(this, state, insn.word(4));
+ GenericValue mask(this, state, insn.word(5));
+ auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
+ auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
+ auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
+ auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ SIMD::Int v = value.Int(i);
+ dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
+ }
+ break;
+ }
+
+ case spv::OpGroupNonUniformShuffleUp:
+ {
+ GenericValue value(this, state, insn.word(4));
+ GenericValue delta(this, state, insn.word(5));
+ auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
+ auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
+ auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
+ auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ SIMD::Int v = value.Int(i);
+ dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx));
+ }
+ break;
+ }
+
+ case spv::OpGroupNonUniformShuffleDown:
+ {
+ GenericValue value(this, state, insn.word(4));
+ GenericValue delta(this, state, insn.word(5));
+ auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
+ auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
+ auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
+ auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
+ for(auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ SIMD::Int v = value.Int(i);
+ dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww));
+ }
+ break;
+ }
+
+ case spv::OpGroupNonUniformIAdd:
+ {
+ using Type = SIMD::Int;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type> a, RValue<Type> b) { return a + b; });
+ break;
+ }
+
+ case spv::OpGroupNonUniformFAdd:
+ {
+ using Type = SIMD::Float;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0.),
+ [](RValue<Type> a, RValue<Type> b) { return a + b; });
+ break;
+ }
+
+ case spv::OpGroupNonUniformIMul:
+ {
+ using Type = SIMD::Int;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(1),
+ [](RValue<Type> a, RValue<Type> b) { return a * b; });
+ break;
+ }
+
+ case spv::OpGroupNonUniformFMul:
+ {
+ using Type = SIMD::Float;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(1.),
+ [](RValue<Type> a, RValue<Type> b) { return a * b; });
+ break;
+ }
+
+ case spv::OpGroupNonUniformBitwiseAnd:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(~0u),
+ [](RValue<Type> a, RValue<Type> b) { return a & b; });
+ break;
+ }
+
+ case spv::OpGroupNonUniformBitwiseOr:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type> a, RValue<Type> b) { return a | b; });
+ break;
+ }
+
+ case spv::OpGroupNonUniformBitwiseXor:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type> a, RValue<Type> b) { return a ^ b; });
+ break;
+ }
+
+ case spv::OpGroupNonUniformSMin:
+ {
+ using Type = SIMD::Int;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(INT32_MAX),
+ [](RValue<Type> a, RValue<Type> b) { return Min(a, b); });
+ break;
+ }
+
+ case spv::OpGroupNonUniformUMin:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(~0u),
+ [](RValue<Type> a, RValue<Type> b) { return Min(a, b); });
+ break;
+ }
+
+ case spv::OpGroupNonUniformFMin:
+ {
+ using Type = SIMD::Float;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type::positive_inf(),
+ [](RValue<Type> a, RValue<Type> b) { return NMin(a, b); });
+ break;
+ }
+
+ case spv::OpGroupNonUniformSMax:
+ {
+ using Type = SIMD::Int;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(INT32_MIN),
+ [](RValue<Type> a, RValue<Type> b) { return Max(a, b); });
+ break;
+ }
+
+ case spv::OpGroupNonUniformUMax:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type> a, RValue<Type> b) { return Max(a, b); });
+ break;
+ }
+
+ case spv::OpGroupNonUniformFMax:
+ {
+ using Type = SIMD::Float;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type::negative_inf(),
+ [](RValue<Type> a, RValue<Type> b) { return NMax(a, b); });
+ break;
+ }
+
+ case spv::OpGroupNonUniformLogicalAnd:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(~0u),
+ [](RValue<Type> a, RValue<Type> b) {
+ SIMD::UInt zero = SIMD::UInt(0);
+ return CmpNEQ(a, zero) & CmpNEQ(b, zero);
+ });
+ break;
+ }
+
+ case spv::OpGroupNonUniformLogicalOr:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type> a, RValue<Type> b) {
+ SIMD::UInt zero = SIMD::UInt(0);
+ return CmpNEQ(a, zero) | CmpNEQ(b, zero);
+ });
+ break;
+ }
+
+ case spv::OpGroupNonUniformLogicalXor:
+ {
+ using Type = SIMD::UInt;
+ SpirvShader::GroupOps::BinaryOperation(
+ this, insn, state, dst,
+ Type(0),
+ [](RValue<Type> a, RValue<Type> b) {
+ SIMD::UInt zero = SIMD::UInt(0);
+ return CmpNEQ(a, zero) ^ CmpNEQ(b, zero);
+ });
+ break;
+ }
+
default:
- UNSUPPORTED("GroupOperation %d", int(operation));
- }
- break;
- }
-
- case spv::OpGroupNonUniformBallotFindLSB:
- {
- auto valueId = Object::ID(insn.word(4));
- ASSERT(type.sizeInComponents == 1);
- ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
- GenericValue value(this, state, valueId);
- dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true));
- break;
- }
-
- case spv::OpGroupNonUniformBallotFindMSB:
- {
- auto valueId = Object::ID(insn.word(4));
- ASSERT(type.sizeInComponents == 1);
- ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
- GenericValue value(this, state, valueId);
- dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false));
- break;
- }
-
- case spv::OpGroupNonUniformShuffle:
- {
- GenericValue value(this, state, insn.word(4));
- GenericValue id(this, state, insn.word(5));
- auto x = CmpEQ(SIMD::Int(0), id.Int(0));
- auto y = CmpEQ(SIMD::Int(1), id.Int(0));
- auto z = CmpEQ(SIMD::Int(2), id.Int(0));
- auto w = CmpEQ(SIMD::Int(3), id.Int(0));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- SIMD::Int v = value.Int(i);
- dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
- }
- break;
- }
-
- case spv::OpGroupNonUniformShuffleXor:
- {
- GenericValue value(this, state, insn.word(4));
- GenericValue mask(this, state, insn.word(5));
- auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
- auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
- auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
- auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- SIMD::Int v = value.Int(i);
- dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
- }
- break;
- }
-
- case spv::OpGroupNonUniformShuffleUp:
- {
- GenericValue value(this, state, insn.word(4));
- GenericValue delta(this, state, insn.word(5));
- auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
- auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
- auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
- auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- SIMD::Int v = value.Int(i);
- dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx));
- }
- break;
- }
-
- case spv::OpGroupNonUniformShuffleDown:
- {
- GenericValue value(this, state, insn.word(4));
- GenericValue delta(this, state, insn.word(5));
- auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
- auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
- auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
- auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
- for(auto i = 0u; i < type.sizeInComponents; i++)
- {
- SIMD::Int v = value.Int(i);
- dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww));
- }
- break;
- }
-
- case spv::OpGroupNonUniformIAdd:
- {
- using Type = SIMD::Int;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(0),
- [](RValue<Type>a, RValue<Type>b){ return a + b; }
- );
- break;
- }
-
- case spv::OpGroupNonUniformFAdd:
- {
- using Type = SIMD::Float;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(0.),
- [](RValue<Type>a, RValue<Type>b){ return a + b; }
- );
- break;
- }
-
- case spv::OpGroupNonUniformIMul:
- {
- using Type = SIMD::Int;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(1),
- [](RValue<Type>a, RValue<Type>b){ return a * b; }
- );
- break;
- }
-
- case spv::OpGroupNonUniformFMul:
- {
- using Type = SIMD::Float;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(1.),
- [](RValue<Type>a, RValue<Type>b){ return a * b; }
- );
- break;
- }
-
- case spv::OpGroupNonUniformBitwiseAnd:
- {
- using Type = SIMD::UInt;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(~0u),
- [](RValue<Type>a, RValue<Type>b){ return a & b; }
- );
- break;
- }
-
- case spv::OpGroupNonUniformBitwiseOr:
- {
- using Type = SIMD::UInt;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(0),
- [](RValue<Type>a, RValue<Type>b){ return a | b; }
- );
- break;
- }
-
- case spv::OpGroupNonUniformBitwiseXor:
- {
- using Type = SIMD::UInt;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(0),
- [](RValue<Type>a, RValue<Type>b){ return a ^ b; }
- );
- break;
- }
-
- case spv::OpGroupNonUniformSMin:
- {
- using Type = SIMD::Int;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(INT32_MAX),
- [](RValue<Type>a, RValue<Type>b){ return Min(a, b); }
- );
- break;
- }
-
- case spv::OpGroupNonUniformUMin:
- {
- using Type = SIMD::UInt;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(~0u),
- [](RValue<Type>a, RValue<Type>b){ return Min(a, b); }
- );
- break;
- }
-
- case spv::OpGroupNonUniformFMin:
- {
- using Type = SIMD::Float;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type::positive_inf(),
- [](RValue<Type>a, RValue<Type>b){ return NMin(a, b); }
- );
- break;
- }
-
- case spv::OpGroupNonUniformSMax:
- {
- using Type = SIMD::Int;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(INT32_MIN),
- [](RValue<Type>a, RValue<Type>b){ return Max(a, b); }
- );
- break;
- }
-
- case spv::OpGroupNonUniformUMax:
- {
- using Type = SIMD::UInt;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(0),
- [](RValue<Type>a, RValue<Type>b){ return Max(a, b); }
- );
- break;
- }
-
- case spv::OpGroupNonUniformFMax:
- {
- using Type = SIMD::Float;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type::negative_inf(),
- [](RValue<Type>a, RValue<Type>b){ return NMax(a, b); }
- );
- break;
- }
-
- case spv::OpGroupNonUniformLogicalAnd:
- {
- using Type = SIMD::UInt;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(~0u),
- [](RValue<Type>a, RValue<Type>b){
- SIMD::UInt zero = SIMD::UInt(0);
- return CmpNEQ(a, zero) & CmpNEQ(b, zero);
- }
- );
- break;
- }
-
- case spv::OpGroupNonUniformLogicalOr:
- {
- using Type = SIMD::UInt;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(0),
- [](RValue<Type>a, RValue<Type>b){
- SIMD::UInt zero = SIMD::UInt(0);
- return CmpNEQ(a, zero) | CmpNEQ(b, zero);
- }
- );
- break;
- }
-
- case spv::OpGroupNonUniformLogicalXor:
- {
- using Type = SIMD::UInt;
- SpirvShader::GroupOps::BinaryOperation(
- this, insn, state, dst,
- Type(0),
- [](RValue<Type>a, RValue<Type>b){
- SIMD::UInt zero = SIMD::UInt(0);
- return CmpNEQ(a, zero) ^ CmpNEQ(b, zero);
- }
- );
- break;
- }
-
- default:
- UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
+ UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
}
return EmitResult::Continue;
}
diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp
index 5e7edcf..1ec6f89 100644
--- a/src/Pipeline/SpirvShaderImage.cpp
+++ b/src/Pipeline/SpirvShaderImage.cpp
@@ -27,26 +27,26 @@
{
switch(format)
{
- case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
- case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
- case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
- case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
- case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
- case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
- case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
- case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
- case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
- case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
- case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
- case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
- case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
- case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
- case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
- case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
+ case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
+ case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
+ case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
+ case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
+ case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
+ case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
+ case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
+ case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
+ case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
+ case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
+ case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
+ case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
+ case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
+ case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
+ case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
+ case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
- default:
- UNIMPLEMENTED("SPIR-V ImageFormat %u", format);
- return VK_FORMAT_UNDEFINED;
+ default:
+ UNIMPLEMENTED("SPIR-V ImageFormat %u", format);
+ return VK_FORMAT_UNDEFINED;
}
}
@@ -57,21 +57,21 @@
sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
- return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // TODO: IfThenElse()
+ return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // TODO: IfThenElse()
}
-} // anonymous namespace
+} // anonymous namespace
namespace sw {
SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
{
- return EmitImageSample({variant, Implicit}, insn, state);
+ return EmitImageSample({ variant, Implicit }, insn, state);
}
SpirvShader::EmitResult SpirvShader::EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const
{
- ImageInstruction instruction = {variant, Gather};
+ ImageInstruction instruction = { variant, Gather };
instruction.gatherComponent = !instruction.isDref() ? getObject(insn.word(5)).constantValue[0] : 0;
return EmitImageSample(instruction, insn, state);
@@ -85,19 +85,20 @@
if((imageOperands & spv::ImageOperandsLodMask) == imageOperands)
{
- return EmitImageSample({variant, Lod}, insn, state);
+ return EmitImageSample({ variant, Lod }, insn, state);
}
else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands)
{
- return EmitImageSample({variant, Grad}, insn, state);
+ return EmitImageSample({ variant, Grad }, insn, state);
}
- else UNIMPLEMENTED("Image Operands %x", imageOperands);
+ else
+ UNIMPLEMENTED("Image Operands %x", imageOperands);
return EmitResult::Continue;
}
SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState *state) const
{
- return EmitImageSample({None, Fetch}, insn, state);
+ return EmitImageSample({ None, Fetch }, insn, state);
}
SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const
@@ -109,18 +110,17 @@
auto &resultType = getType(resultTypeId);
auto &result = state->createIntermediate(resultId, resultType.sizeInComponents);
- auto imageDescriptor = state->getPointer(sampledImageId).base; // vk::SampledImageDescriptor*
+ auto imageDescriptor = state->getPointer(sampledImageId).base; // vk::SampledImageDescriptor*
// If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor
auto &sampledImage = getObject(sampledImageId);
- auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ?
- state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
+ auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ? state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
auto coordinate = GenericValue(this, state, coordinateId);
auto &coordinateType = getType(coordinate.type);
- Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler*
- Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
+ Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler*
+ Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
// Above we assumed that if the SampledImage operand is not the result of an OpSampledImage,
// it must be a combined image sampler loaded straight from the descriptor set. For OpImageFetch
@@ -206,7 +206,7 @@
instruction.coordinates = coordinates;
uint32_t i = 0;
- for( ; i < coordinates; i++)
+ for(; i < coordinates; i++)
{
if(instruction.isProj())
{
@@ -335,7 +335,7 @@
SpirvShader::EmitResult SpirvShader::EmitImageQueryLod(InsnIterator insn, EmitState *state) const
{
- return EmitImageSample({None, Query}, insn, state);
+ return EmitImageSample({ None, Query }, insn, state);
}
void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
@@ -359,23 +359,23 @@
switch(bindingLayout.descriptorType)
{
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- {
- extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]*
- arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t
- break;
- }
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- {
- extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]*
- arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t
- break;
- }
- default:
- UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ {
+ extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]*
+ arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t
+ break;
+ }
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ {
+ extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]*
+ arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t
+ break;
+ }
+ default:
+ UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
}
auto dimensions = resultTy.sizeInComponents - (isArrayed ? 1 : 0);
@@ -421,13 +421,13 @@
Int mipLevels = 0;
switch(bindingLayout.descriptorType)
{
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t
- break;
- default:
- UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t
+ break;
+ default:
+ UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
}
auto &dst = state->createIntermediate(resultId, 1);
@@ -455,16 +455,16 @@
Int sampleCount = 0;
switch(bindingLayout.descriptorType)
{
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t
- break;
- default:
- UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t
+ break;
+ default:
+ UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
}
auto &dst = state->createIntermediate(resultId, 1);
@@ -473,7 +473,7 @@
return EmitResult::Continue;
}
-SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, SIMD::Pointer ptr, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
+SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, SIMD::Pointer ptr, GenericValue const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
{
auto routine = state->routine;
bool isArrayed = imageType.definition.word(5) != 0;
@@ -495,16 +495,16 @@
}
auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
- ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
- : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
+ ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
+ : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
auto slicePitch = SIMD::Int(
- *Pointer<Int>(descriptor + (useStencilAspect
- ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
- : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
+ *Pointer<Int>(descriptor + (useStencilAspect
+ ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
+ : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
auto samplePitch = SIMD::Int(
- *Pointer<Int>(descriptor + (useStencilAspect
- ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
- : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
+ *Pointer<Int>(descriptor + (useStencilAspect
+ ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
+ : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
ptr += u * SIMD::Int(texelSize);
if(dims > 1)
@@ -568,13 +568,13 @@
// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
// the renderpass data instead. In all other cases, we can use the format in the instruction.
auto vkFormat = (dim == spv::DimSubpassData)
- ? inputAttachmentFormats[d.InputAttachmentIndex]
- : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
+ ? inputAttachmentFormats[d.InputAttachmentIndex]
+ : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
// Depth+Stencil image attachments select aspect based on the Sampled Type of the
// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
- getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
+ getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
if(useStencilAspect)
{
@@ -584,8 +584,8 @@
auto pointer = state->getPointer(imageId);
Pointer<Byte> binding = pointer.base;
Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect
- ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
- : OFFSET(vk::StorageImageDescriptor, ptr)));
+ ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
+ : OFFSET(vk::StorageImageDescriptor, ptr)));
auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
@@ -604,7 +604,7 @@
// Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
// of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
// TODO: specialize for small formats?
- for(auto i = 0; i < (texelSize + 3)/4; i++)
+ for(auto i = 0; i < (texelSize + 3) / 4; i++)
{
packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
texelPtr += sizeof(float);
@@ -615,214 +615,214 @@
// - Any format supported as a color or depth/stencil attachment, for input attachments
switch(vkFormat)
{
- case VK_FORMAT_R32G32B32A32_SFLOAT:
- case VK_FORMAT_R32G32B32A32_SINT:
- case VK_FORMAT_R32G32B32A32_UINT:
- dst.move(0, packed[0]);
- dst.move(1, packed[1]);
- dst.move(2, packed[2]);
- dst.move(3, packed[3]);
- break;
- case VK_FORMAT_R32_SINT:
- case VK_FORMAT_R32_UINT:
- dst.move(0, packed[0]);
- // Fill remaining channels with 0,0,1 (of the correct type)
- dst.move(1, SIMD::Int(0));
- dst.move(2, SIMD::Int(0));
- dst.move(3, SIMD::Int(1));
- break;
- case VK_FORMAT_R32_SFLOAT:
- case VK_FORMAT_D32_SFLOAT:
- case VK_FORMAT_D32_SFLOAT_S8_UINT:
- dst.move(0, packed[0]);
- // Fill remaining channels with 0,0,1 (of the correct type)
- dst.move(1, SIMD::Float(0));
- dst.move(2, SIMD::Float(0));
- dst.move(3, SIMD::Float(1));
- break;
- case VK_FORMAT_D16_UNORM:
- dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f));
- dst.move(1, SIMD::Float(0));
- dst.move(2, SIMD::Float(0));
- dst.move(3, SIMD::Float(1));
- break;
- case VK_FORMAT_R16G16B16A16_SINT:
- dst.move(0, (packed[0] << 16) >> 16);
- dst.move(1, (packed[0]) >> 16);
- dst.move(2, (packed[1] << 16) >> 16);
- dst.move(3, (packed[1]) >> 16);
- break;
- case VK_FORMAT_R16G16B16A16_UINT:
- dst.move(0, packed[0] & SIMD::Int(0xffff));
- dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
- dst.move(2, packed[1] & SIMD::Int(0xffff));
- dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
- break;
- case VK_FORMAT_R16G16B16A16_SFLOAT:
- dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
- dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
- dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
- dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
- break;
- case VK_FORMAT_R8G8B8A8_SNORM:
- dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- dst.move(1, Min(Max(SIMD::Float(((packed[0]<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- dst.move(2, Min(Max(SIMD::Float(((packed[0]<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
- break;
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(2, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- break;
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
- dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
- dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
- dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
- dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- break;
- case VK_FORMAT_B8G8R8A8_UNORM:
- dst.move(0, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- break;
- case VK_FORMAT_B8G8R8A8_SRGB:
- dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
- dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
- dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
- dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- break;
- case VK_FORMAT_R8G8B8A8_UINT:
- case VK_FORMAT_A8B8G8R8_UINT_PACK32:
- dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
- dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
- dst.move(2, ((As<SIMD::UInt>(packed[0])>>16) & SIMD::UInt(0xFF)));
- dst.move(3, ((As<SIMD::UInt>(packed[0])>>24) & SIMD::UInt(0xFF)));
- break;
- case VK_FORMAT_R8G8B8A8_SINT:
- case VK_FORMAT_A8B8G8R8_SINT_PACK32:
- dst.move(0, (packed[0] << 24) >> 24);
- dst.move(1, (packed[0] << 16) >> 24);
- dst.move(2, (packed[0] << 8) >> 24);
- dst.move(3, (packed[0]) >> 24);
- break;
- case VK_FORMAT_R8_UNORM:
- dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(1, SIMD::Float(0));
- dst.move(2, SIMD::Float(0));
- dst.move(3, SIMD::Float(1));
- break;
- case VK_FORMAT_R8_UINT:
- case VK_FORMAT_S8_UINT:
- dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
- dst.move(1, SIMD::UInt(0));
- dst.move(2, SIMD::UInt(0));
- dst.move(3, SIMD::UInt(1));
- break;
- case VK_FORMAT_R8_SINT:
- dst.move(0, (packed[0] << 24) >> 24);
- dst.move(1, SIMD::Int(0));
- dst.move(2, SIMD::Int(0));
- dst.move(3, SIMD::Int(1));
- break;
- case VK_FORMAT_R8G8_UNORM:
- dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
- dst.move(2, SIMD::Float(0));
- dst.move(3, SIMD::Float(1));
- break;
- case VK_FORMAT_R8G8_UINT:
- dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
- dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
- dst.move(2, SIMD::UInt(0));
- dst.move(3, SIMD::UInt(1));
- break;
- case VK_FORMAT_R8G8_SINT:
- dst.move(0, (packed[0] << 24) >> 24);
- dst.move(1, (packed[0] << 16) >> 24);
- dst.move(2, SIMD::Int(0));
- dst.move(3, SIMD::Int(1));
- break;
- case VK_FORMAT_R16_SFLOAT:
- dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
- dst.move(1, SIMD::Float(0));
- dst.move(2, SIMD::Float(0));
- dst.move(3, SIMD::Float(1));
- break;
- case VK_FORMAT_R16_UINT:
- dst.move(0, packed[0] & SIMD::Int(0xffff));
- dst.move(1, SIMD::UInt(0));
- dst.move(2, SIMD::UInt(0));
- dst.move(3, SIMD::UInt(1));
- break;
- case VK_FORMAT_R16_SINT:
- dst.move(0, (packed[0] << 16) >> 16);
- dst.move(1, SIMD::Int(0));
- dst.move(2, SIMD::Int(0));
- dst.move(3, SIMD::Int(1));
- break;
- case VK_FORMAT_R16G16_SFLOAT:
- dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
- dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
- dst.move(2, SIMD::Float(0));
- dst.move(3, SIMD::Float(1));
- break;
- case VK_FORMAT_R16G16_UINT:
- dst.move(0, packed[0] & SIMD::Int(0xffff));
- dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
- dst.move(2, SIMD::UInt(0));
- dst.move(3, SIMD::UInt(1));
- break;
- case VK_FORMAT_R16G16_SINT:
- dst.move(0, (packed[0] << 16) >> 16);
- dst.move(1, (packed[0]) >> 16);
- dst.move(2, SIMD::Int(0));
- dst.move(3, SIMD::Int(1));
- break;
- case VK_FORMAT_R32G32_SINT:
- case VK_FORMAT_R32G32_UINT:
- dst.move(0, packed[0]);
- dst.move(1, packed[1]);
- dst.move(2, SIMD::Int(0));
- dst.move(3, SIMD::Int(1));
- break;
- case VK_FORMAT_R32G32_SFLOAT:
- dst.move(0, packed[0]);
- dst.move(1, packed[1]);
- dst.move(2, SIMD::Float(0));
- dst.move(3, SIMD::Float(1));
- break;
- case VK_FORMAT_A2B10G10R10_UINT_PACK32:
- dst.move(0, (packed[0]) & SIMD::Int(0x3FF));
- dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
- dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
- dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
- break;
- case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
- dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
- dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
- dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
- dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
- break;
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
- dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
- dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
- dst.move(3, SIMD::Float(1));
- break;
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
- dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
- dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
- dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
- break;
- default:
- UNIMPLEMENTED("VkFormat %d", int(vkFormat));
- break;
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ dst.move(0, packed[0]);
+ dst.move(1, packed[1]);
+ dst.move(2, packed[2]);
+ dst.move(3, packed[3]);
+ break;
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32_UINT:
+ dst.move(0, packed[0]);
+ // Fill remaining channels with 0,0,1 (of the correct type)
+ dst.move(1, SIMD::Int(0));
+ dst.move(2, SIMD::Int(0));
+ dst.move(3, SIMD::Int(1));
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_D32_SFLOAT:
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ dst.move(0, packed[0]);
+ // Fill remaining channels with 0,0,1 (of the correct type)
+ dst.move(1, SIMD::Float(0));
+ dst.move(2, SIMD::Float(0));
+ dst.move(3, SIMD::Float(1));
+ break;
+ case VK_FORMAT_D16_UNORM:
+ dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f));
+ dst.move(1, SIMD::Float(0));
+ dst.move(2, SIMD::Float(0));
+ dst.move(3, SIMD::Float(1));
+ break;
+ case VK_FORMAT_R16G16B16A16_SINT:
+ dst.move(0, (packed[0] << 16) >> 16);
+ dst.move(1, (packed[0]) >> 16);
+ dst.move(2, (packed[1] << 16) >> 16);
+ dst.move(3, (packed[1]) >> 16);
+ break;
+ case VK_FORMAT_R16G16B16A16_UINT:
+ dst.move(0, packed[0] & SIMD::Int(0xffff));
+ dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
+ dst.move(2, packed[1] & SIMD::Int(0xffff));
+ dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
+ break;
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
+ dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
+ dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
+ dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
+ break;
+ case VK_FORMAT_R8G8B8A8_SNORM:
+ dst.move(0, Min(Max(SIMD::Float(((packed[0] << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(1, Min(Max(SIMD::Float(((packed[0] << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(2, Min(Max(SIMD::Float(((packed[0] << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ break;
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(2, SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ break;
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+ dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+ dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+ dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ break;
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ dst.move(0, SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ break;
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+ dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+ dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
+ dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ break;
+ case VK_FORMAT_R8G8B8A8_UINT:
+ case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+ dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
+ dst.move(1, ((As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF)));
+ dst.move(2, ((As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF)));
+ dst.move(3, ((As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF)));
+ break;
+ case VK_FORMAT_R8G8B8A8_SINT:
+ case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+ dst.move(0, (packed[0] << 24) >> 24);
+ dst.move(1, (packed[0] << 16) >> 24);
+ dst.move(2, (packed[0] << 8) >> 24);
+ dst.move(3, (packed[0]) >> 24);
+ break;
+ case VK_FORMAT_R8_UNORM:
+ dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(1, SIMD::Float(0));
+ dst.move(2, SIMD::Float(0));
+ dst.move(3, SIMD::Float(1));
+ break;
+ case VK_FORMAT_R8_UINT:
+ case VK_FORMAT_S8_UINT:
+ dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
+ dst.move(1, SIMD::UInt(0));
+ dst.move(2, SIMD::UInt(0));
+ dst.move(3, SIMD::UInt(1));
+ break;
+ case VK_FORMAT_R8_SINT:
+ dst.move(0, (packed[0] << 24) >> 24);
+ dst.move(1, SIMD::Int(0));
+ dst.move(2, SIMD::Int(0));
+ dst.move(3, SIMD::Int(1));
+ break;
+ case VK_FORMAT_R8G8_UNORM:
+ dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(2, SIMD::Float(0));
+ dst.move(3, SIMD::Float(1));
+ break;
+ case VK_FORMAT_R8G8_UINT:
+ dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
+ dst.move(1, ((As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF)));
+ dst.move(2, SIMD::UInt(0));
+ dst.move(3, SIMD::UInt(1));
+ break;
+ case VK_FORMAT_R8G8_SINT:
+ dst.move(0, (packed[0] << 24) >> 24);
+ dst.move(1, (packed[0] << 16) >> 24);
+ dst.move(2, SIMD::Int(0));
+ dst.move(3, SIMD::Int(1));
+ break;
+ case VK_FORMAT_R16_SFLOAT:
+ dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
+ dst.move(1, SIMD::Float(0));
+ dst.move(2, SIMD::Float(0));
+ dst.move(3, SIMD::Float(1));
+ break;
+ case VK_FORMAT_R16_UINT:
+ dst.move(0, packed[0] & SIMD::Int(0xffff));
+ dst.move(1, SIMD::UInt(0));
+ dst.move(2, SIMD::UInt(0));
+ dst.move(3, SIMD::UInt(1));
+ break;
+ case VK_FORMAT_R16_SINT:
+ dst.move(0, (packed[0] << 16) >> 16);
+ dst.move(1, SIMD::Int(0));
+ dst.move(2, SIMD::Int(0));
+ dst.move(3, SIMD::Int(1));
+ break;
+ case VK_FORMAT_R16G16_SFLOAT:
+ dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
+ dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
+ dst.move(2, SIMD::Float(0));
+ dst.move(3, SIMD::Float(1));
+ break;
+ case VK_FORMAT_R16G16_UINT:
+ dst.move(0, packed[0] & SIMD::Int(0xffff));
+ dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
+ dst.move(2, SIMD::UInt(0));
+ dst.move(3, SIMD::UInt(1));
+ break;
+ case VK_FORMAT_R16G16_SINT:
+ dst.move(0, (packed[0] << 16) >> 16);
+ dst.move(1, (packed[0]) >> 16);
+ dst.move(2, SIMD::Int(0));
+ dst.move(3, SIMD::Int(1));
+ break;
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32_UINT:
+ dst.move(0, packed[0]);
+ dst.move(1, packed[1]);
+ dst.move(2, SIMD::Int(0));
+ dst.move(3, SIMD::Int(1));
+ break;
+ case VK_FORMAT_R32G32_SFLOAT:
+ dst.move(0, packed[0]);
+ dst.move(1, packed[1]);
+ dst.move(2, SIMD::Float(0));
+ dst.move(3, SIMD::Float(1));
+ break;
+ case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+ dst.move(0, (packed[0]) & SIMD::Int(0x3FF));
+ dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
+ dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
+ dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
+ break;
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+ dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+ dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+ dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
+ break;
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
+ dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(3, SIMD::Float(1));
+ break;
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
+ break;
+ default:
+ UNIMPLEMENTED("VkFormat %d", int(vkFormat));
+ break;
}
return EmitResult::Continue;
@@ -852,103 +852,106 @@
auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8));
switch(format)
{
- case spv::ImageFormatRgba32f:
- case spv::ImageFormatRgba32i:
- case spv::ImageFormatRgba32ui:
- texelSize = 16;
- packed[0] = texel.Int(0);
- packed[1] = texel.Int(1);
- packed[2] = texel.Int(2);
- packed[3] = texel.Int(3);
- numPackedElements = 4;
- break;
- case spv::ImageFormatR32f:
- case spv::ImageFormatR32i:
- case spv::ImageFormatR32ui:
- texelSize = 4;
- packed[0] = texel.Int(0);
- numPackedElements = 1;
- break;
- case spv::ImageFormatRgba8:
- texelSize = 4;
- packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
- ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
- ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
- ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
- numPackedElements = 1;
- break;
- case spv::ImageFormatRgba8Snorm:
- texelSize = 4;
- packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
- SIMD::Int(0xFF)) |
- ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
- SIMD::Int(0xFF)) << 8) |
- ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
- SIMD::Int(0xFF)) << 16) |
- ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
- SIMD::Int(0xFF)) << 24);
- numPackedElements = 1;
- break;
- case spv::ImageFormatRgba8i:
- case spv::ImageFormatRgba8ui:
- texelSize = 4;
- packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
- (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
- (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
- (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
- numPackedElements = 1;
- break;
- case spv::ImageFormatRgba16f:
- texelSize = 8;
- packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
- packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true);
- numPackedElements = 2;
- break;
- case spv::ImageFormatRgba16i:
- case spv::ImageFormatRgba16ui:
- texelSize = 8;
- packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
- packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16);
- numPackedElements = 2;
- break;
- case spv::ImageFormatRg32f:
- case spv::ImageFormatRg32i:
- case spv::ImageFormatRg32ui:
- texelSize = 8;
- packed[0] = texel.Int(0);
- packed[1] = texel.Int(1);
- numPackedElements = 2;
- break;
+ case spv::ImageFormatRgba32f:
+ case spv::ImageFormatRgba32i:
+ case spv::ImageFormatRgba32ui:
+ texelSize = 16;
+ packed[0] = texel.Int(0);
+ packed[1] = texel.Int(1);
+ packed[2] = texel.Int(2);
+ packed[3] = texel.Int(3);
+ numPackedElements = 4;
+ break;
+ case spv::ImageFormatR32f:
+ case spv::ImageFormatR32i:
+ case spv::ImageFormatR32ui:
+ texelSize = 4;
+ packed[0] = texel.Int(0);
+ numPackedElements = 1;
+ break;
+ case spv::ImageFormatRgba8:
+ texelSize = 4;
+ packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
+ ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
+ ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
+ ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
+ numPackedElements = 1;
+ break;
+ case spv::ImageFormatRgba8Snorm:
+ texelSize = 4;
+ packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF)) |
+ ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF))
+ << 8) |
+ ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF))
+ << 16) |
+ ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF))
+ << 24);
+ numPackedElements = 1;
+ break;
+ case spv::ImageFormatRgba8i:
+ case spv::ImageFormatRgba8ui:
+ texelSize = 4;
+ packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
+ (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
+ (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
+ (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
+ numPackedElements = 1;
+ break;
+ case spv::ImageFormatRgba16f:
+ texelSize = 8;
+ packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
+ packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true);
+ numPackedElements = 2;
+ break;
+ case spv::ImageFormatRgba16i:
+ case spv::ImageFormatRgba16ui:
+ texelSize = 8;
+ packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
+ packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16);
+ numPackedElements = 2;
+ break;
+ case spv::ImageFormatRg32f:
+ case spv::ImageFormatRg32i:
+ case spv::ImageFormatRg32ui:
+ texelSize = 8;
+ packed[0] = texel.Int(0);
+ packed[1] = texel.Int(1);
+ numPackedElements = 2;
+ break;
- case spv::ImageFormatRg16f:
- case spv::ImageFormatR11fG11fB10f:
- case spv::ImageFormatR16f:
- case spv::ImageFormatRgba16:
- case spv::ImageFormatRgb10A2:
- case spv::ImageFormatRg16:
- case spv::ImageFormatRg8:
- case spv::ImageFormatR16:
- case spv::ImageFormatR8:
- case spv::ImageFormatRgba16Snorm:
- case spv::ImageFormatRg16Snorm:
- case spv::ImageFormatRg8Snorm:
- case spv::ImageFormatR16Snorm:
- case spv::ImageFormatR8Snorm:
- case spv::ImageFormatRg16i:
- case spv::ImageFormatRg8i:
- case spv::ImageFormatR16i:
- case spv::ImageFormatR8i:
- case spv::ImageFormatRgb10a2ui:
- case spv::ImageFormatRg16ui:
- case spv::ImageFormatRg8ui:
- case spv::ImageFormatR16ui:
- case spv::ImageFormatR8ui:
- UNIMPLEMENTED("spv::ImageFormat %d", int(format));
- break;
+ case spv::ImageFormatRg16f:
+ case spv::ImageFormatR11fG11fB10f:
+ case spv::ImageFormatR16f:
+ case spv::ImageFormatRgba16:
+ case spv::ImageFormatRgb10A2:
+ case spv::ImageFormatRg16:
+ case spv::ImageFormatRg8:
+ case spv::ImageFormatR16:
+ case spv::ImageFormatR8:
+ case spv::ImageFormatRgba16Snorm:
+ case spv::ImageFormatRg16Snorm:
+ case spv::ImageFormatRg8Snorm:
+ case spv::ImageFormatR16Snorm:
+ case spv::ImageFormatR8Snorm:
+ case spv::ImageFormatRg16i:
+ case spv::ImageFormatRg8i:
+ case spv::ImageFormatR16i:
+ case spv::ImageFormatR8i:
+ case spv::ImageFormatRgb10a2ui:
+ case spv::ImageFormatRg16ui:
+ case spv::ImageFormatRg8ui:
+ case spv::ImageFormatR16ui:
+ case spv::ImageFormatR8ui:
+ UNIMPLEMENTED("spv::ImageFormat %d", int(format));
+ break;
- default:
- UNREACHABLE("spv::ImageFormat %d", int(format));
- break;
+ default:
+ UNREACHABLE("spv::ImageFormat %d", int(format));
+ break;
}
auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
diff --git a/src/Pipeline/SpirvShaderMemory.cpp b/src/Pipeline/SpirvShaderMemory.cpp
index c172d3c..8df565f 100644
--- a/src/Pipeline/SpirvShaderMemory.cpp
+++ b/src/Pipeline/SpirvShaderMemory.cpp
@@ -57,8 +57,7 @@
auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
- VisitMemoryObject(pointerId, [&](const MemoryElement& el)
- {
+ VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
auto p = ptr + el.offset;
if(interleavedByLane) { p = InterleaveByLane(p); } // TODO: Interleave once, then add offset?
dst.move(el.index, p.Load<SIMD::Float>(robustness, state->activeLaneMask(), atomic, memoryOrder));
@@ -101,8 +100,7 @@
{
// Constant source data.
const uint32_t *src = object.constantValue.get();
- VisitMemoryObject(pointerId, [&](const MemoryElement& el)
- {
+ VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
auto p = ptr + el.offset;
if(interleavedByLane) { p = InterleaveByLane(p); }
p.Store(SIMD::Int(src[el.index]), robustness, mask, atomic, memoryOrder);
@@ -112,8 +110,7 @@
{
// Intermediate source data.
auto &src = state->getIntermediate(objectId);
- VisitMemoryObject(pointerId, [&](const MemoryElement& el)
- {
+ VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
auto p = ptr + el.offset;
if(interleavedByLane) { p = InterleaveByLane(p); }
p.Store(src.Float(el.index), robustness, mask, atomic, memoryOrder);
@@ -132,95 +129,95 @@
switch(objectTy.storageClass)
{
- case spv::StorageClassOutput:
- case spv::StorageClassPrivate:
- case spv::StorageClassFunction:
- {
- ASSERT(objectTy.opcode() == spv::OpTypePointer);
- auto base = &routine->getVariable(resultId)[0];
- auto elementTy = getType(objectTy.element);
- auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
- state->createPointer(resultId, SIMD::Pointer(base, size));
- break;
- }
- case spv::StorageClassWorkgroup:
- {
- ASSERT(objectTy.opcode() == spv::OpTypePointer);
- auto base = &routine->workgroupMemory[0];
- auto size = workgroupMemory.size();
- state->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
- break;
- }
- case spv::StorageClassInput:
- {
- if(object.kind == Object::Kind::InterfaceVariable)
+ case spv::StorageClassOutput:
+ case spv::StorageClassPrivate:
+ case spv::StorageClassFunction:
{
- auto &dst = routine->getVariable(resultId);
- int offset = 0;
- VisitInterface(resultId,
- [&](Decorations const &d, AttribType type) {
- auto scalarSlot = d.Location << 2 | d.Component;
- dst[offset++] = routine->inputs[scalarSlot];
- });
+ ASSERT(objectTy.opcode() == spv::OpTypePointer);
+ auto base = &routine->getVariable(resultId)[0];
+ auto elementTy = getType(objectTy.element);
+ auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
+ state->createPointer(resultId, SIMD::Pointer(base, size));
+ break;
}
- ASSERT(objectTy.opcode() == spv::OpTypePointer);
- auto base = &routine->getVariable(resultId)[0];
- auto elementTy = getType(objectTy.element);
- auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
- state->createPointer(resultId, SIMD::Pointer(base, size));
- break;
- }
- case spv::StorageClassUniformConstant:
- {
- const auto &d = descriptorDecorations.at(resultId);
- ASSERT(d.DescriptorSet >= 0);
- ASSERT(d.Binding >= 0);
+ case spv::StorageClassWorkgroup:
+ {
+ ASSERT(objectTy.opcode() == spv::OpTypePointer);
+ auto base = &routine->workgroupMemory[0];
+ auto size = workgroupMemory.size();
+ state->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
+ break;
+ }
+ case spv::StorageClassInput:
+ {
+ if(object.kind == Object::Kind::InterfaceVariable)
+ {
+ auto &dst = routine->getVariable(resultId);
+ int offset = 0;
+ VisitInterface(resultId,
+ [&](Decorations const &d, AttribType type) {
+ auto scalarSlot = d.Location << 2 | d.Component;
+ dst[offset++] = routine->inputs[scalarSlot];
+ });
+ }
+ ASSERT(objectTy.opcode() == spv::OpTypePointer);
+ auto base = &routine->getVariable(resultId)[0];
+ auto elementTy = getType(objectTy.element);
+ auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
+ state->createPointer(resultId, SIMD::Pointer(base, size));
+ break;
+ }
+ case spv::StorageClassUniformConstant:
+ {
+ const auto &d = descriptorDecorations.at(resultId);
+ ASSERT(d.DescriptorSet >= 0);
+ ASSERT(d.Binding >= 0);
- uint32_t arrayIndex = 0; // TODO(b/129523279)
- auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
- if(setLayout->hasBinding(d.Binding))
- {
- uint32_t bindingOffset = static_cast<uint32_t>(setLayout->getBindingOffset(d.Binding, arrayIndex));
- Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
- Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor*
- auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
- state->createPointer(resultId, SIMD::Pointer(binding, size));
+ uint32_t arrayIndex = 0; // TODO(b/129523279)
+ auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
+ if(setLayout->hasBinding(d.Binding))
+ {
+ uint32_t bindingOffset = static_cast<uint32_t>(setLayout->getBindingOffset(d.Binding, arrayIndex));
+ Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
+ Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor*
+ auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
+ state->createPointer(resultId, SIMD::Pointer(binding, size));
+ }
+ else
+ {
+ // TODO: Error if the variable with the non-existant binding is
+ // used? Or perhaps strip these unused variable declarations as
+ // a preprocess on the SPIR-V?
+ }
+ break;
}
- else
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
{
- // TODO: Error if the variable with the non-existant binding is
- // used? Or perhaps strip these unused variable declarations as
- // a preprocess on the SPIR-V?
+ const auto &d = descriptorDecorations.at(resultId);
+ ASSERT(d.DescriptorSet >= 0);
+ auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
+ // Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number
+ // of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this
+ // is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it.
+ if(d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS)
+ {
+ state->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
+ }
+ else
+ {
+ state->createPointer(resultId, SIMD::Pointer(nullptr, 0));
+ }
+ break;
}
- break;
- }
- case spv::StorageClassUniform:
- case spv::StorageClassStorageBuffer:
- {
- const auto &d = descriptorDecorations.at(resultId);
- ASSERT(d.DescriptorSet >= 0);
- auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
- // Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number
- // of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this
- // is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it.
- if(d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS)
+ case spv::StorageClassPushConstant:
{
- state->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
+ state->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
+ break;
}
- else
- {
- state->createPointer(resultId, SIMD::Pointer(nullptr, 0));
- }
- break;
- }
- case spv::StorageClassPushConstant:
- {
- state->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
- break;
- }
- default:
- UNREACHABLE("Storage class %d", objectTy.storageClass);
- break;
+ default:
+ UNREACHABLE("Storage class %d", objectTy.storageClass);
+ break;
}
if(insn.wordCount() > 4)
@@ -232,24 +229,23 @@
}
switch(objectTy.storageClass)
{
- case spv::StorageClassOutput:
- case spv::StorageClassPrivate:
- case spv::StorageClassFunction:
- {
- bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
- auto ptr = GetPointerToData(resultId, 0, state);
- GenericValue initialValue(this, state, initializerId);
- VisitMemoryObject(resultId, [&](const MemoryElement& el)
+ case spv::StorageClassOutput:
+ case spv::StorageClassPrivate:
+ case spv::StorageClassFunction:
{
- auto p = ptr + el.offset;
- if(interleavedByLane) { p = InterleaveByLane(p); }
- auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds.
- p.Store(initialValue.Float(el.index), robustness, state->activeLaneMask());
- });
- break;
- }
- default:
- ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
+ bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
+ auto ptr = GetPointerToData(resultId, 0, state);
+ GenericValue initialValue(this, state, initializerId);
+ VisitMemoryObject(resultId, [&](const MemoryElement &el) {
+ auto p = ptr + el.offset;
+ if(interleavedByLane) { p = InterleaveByLane(p); }
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds.
+ p.Store(initialValue.Float(el.index), robustness, state->activeLaneMask());
+ });
+ break;
+ }
+ default:
+ ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
}
}
@@ -271,10 +267,9 @@
std::unordered_map<uint32_t, uint32_t> srcOffsets;
- VisitMemoryObject(srcPtrId, [&](const MemoryElement& el) { srcOffsets[el.index] = el.offset; });
+ VisitMemoryObject(srcPtrId, [&](const MemoryElement &el) { srcOffsets[el.index] = el.offset; });
- VisitMemoryObject(dstPtrId, [&](const MemoryElement& el)
- {
+ VisitMemoryObject(dstPtrId, [&](const MemoryElement &el) {
auto it = srcOffsets.find(el.index);
ASSERT(it != srcOffsets.end());
auto srcOffset = it->second;
@@ -303,7 +298,7 @@
return EmitResult::Continue;
}
-void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t& index, uint32_t offset, const MemoryVisitor& f) const
+void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &f) const
{
ApplyDecorationsForId(&d, id);
auto const &type = getType(id);
@@ -316,60 +311,60 @@
switch(type.opcode())
{
- case spv::OpTypePointer:
- VisitMemoryObjectInner(type.definition.word(3), d, index, offset, f);
- break;
- case spv::OpTypeInt:
- case spv::OpTypeFloat:
- case spv::OpTypeRuntimeArray:
- f(MemoryElement{index++, offset, type});
- break;
- case spv::OpTypeVector:
- {
- auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
- for(auto i = 0u; i < type.definition.word(3); i++)
+ case spv::OpTypePointer:
+ VisitMemoryObjectInner(type.definition.word(3), d, index, offset, f);
+ break;
+ case spv::OpTypeInt:
+ case spv::OpTypeFloat:
+ case spv::OpTypeRuntimeArray:
+ f(MemoryElement{ index++, offset, type });
+ break;
+ case spv::OpTypeVector:
{
- VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
+ auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
+ for(auto i = 0u; i < type.definition.word(3); i++)
+ {
+ VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
+ }
+ break;
}
- break;
- }
- case spv::OpTypeMatrix:
- {
- auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
- d.InsideMatrix = true;
- for(auto i = 0u; i < type.definition.word(3); i++)
+ case spv::OpTypeMatrix:
{
- ASSERT(d.HasMatrixStride);
- VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f);
+ auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
+ d.InsideMatrix = true;
+ for(auto i = 0u; i < type.definition.word(3); i++)
+ {
+ ASSERT(d.HasMatrixStride);
+ VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f);
+ }
+ break;
}
- break;
- }
- case spv::OpTypeStruct:
- for(auto i = 0u; i < type.definition.wordCount() - 2; i++)
+ case spv::OpTypeStruct:
+ for(auto i = 0u; i < type.definition.wordCount() - 2; i++)
+ {
+ ApplyDecorationsForIdMember(&d, id, i);
+ VisitMemoryObjectInner(type.definition.word(i + 2), d, index, offset, f);
+ }
+ break;
+ case spv::OpTypeArray:
{
- ApplyDecorationsForIdMember(&d, id, i);
- VisitMemoryObjectInner(type.definition.word(i + 2), d, index, offset, f);
+ auto arraySize = GetConstScalarInt(type.definition.word(3));
+ for(auto i = 0u; i < arraySize; i++)
+ {
+ ASSERT(d.HasArrayStride);
+ VisitMemoryObjectInner(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
+ }
+ break;
}
- break;
- case spv::OpTypeArray:
- {
- auto arraySize = GetConstScalarInt(type.definition.word(3));
- for(auto i = 0u; i < arraySize; i++)
- {
- ASSERT(d.HasArrayStride);
- VisitMemoryObjectInner(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
- }
- break;
- }
- default:
- UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
+ default:
+ UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
}
}
-void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, const MemoryVisitor& f) const
+void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, const MemoryVisitor &f) const
{
auto typeId = getObject(id).type;
- auto const & type = getType(typeId);
+ auto const &type = getType(typeId);
if(IsExplicitLayout(type.storageClass))
{
Decorations d{};
@@ -384,7 +379,7 @@
for(auto index = 0u; index < elType.sizeInComponents; index++)
{
auto offset = static_cast<uint32_t>(index * sizeof(float));
- f({index, offset, elType});
+ f({ index, offset, elType });
}
}
}
@@ -411,15 +406,15 @@
ASSERT_MSG(setLayout->hasBinding(d.Binding), "Descriptor set %d does not contain binding %d", int(d.DescriptorSet), int(d.Binding));
int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
- Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor*
- Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void*
+ Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor*
+ Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void*
Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
if(setLayout->isBindingDynamic(d.Binding))
{
uint32_t dynamicBindingIndex =
- routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) +
- setLayout->getDynamicDescriptorOffset(d.Binding) +
- arrayIndex;
+ routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) +
+ setLayout->getDynamicDescriptorOffset(d.Binding) +
+ arrayIndex;
Int offset = routine->descriptorDynamicOffsets[dynamicBindingIndex];
Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
@@ -439,23 +434,22 @@
std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
{
auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
- spv::MemorySemanticsAcquireMask |
- spv::MemorySemanticsReleaseMask |
- spv::MemorySemanticsAcquireReleaseMask |
- spv::MemorySemanticsSequentiallyConsistentMask
- );
+ spv::MemorySemanticsAcquireMask |
+ spv::MemorySemanticsReleaseMask |
+ spv::MemorySemanticsAcquireReleaseMask |
+ spv::MemorySemanticsSequentiallyConsistentMask);
switch(control)
{
- case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
- case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
- case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
- case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
- case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
- default:
- // "it is invalid for more than one of these four bits to be set:
- // Acquire, Release, AcquireRelease, or SequentiallyConsistent."
- UNREACHABLE("MemorySemanticsMask: %x", int(control));
- return std::memory_order_acq_rel;
+ case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
+ case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
+ case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
+ case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
+ case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
+ default:
+ // "it is invalid for more than one of these four bits to be set:
+ // Acquire, Release, AcquireRelease, or SequentiallyConsistent."
+ UNREACHABLE("MemorySemanticsMask: %x", int(control));
+ return std::memory_order_acq_rel;
}
}
@@ -463,12 +457,12 @@
{
switch(storageClass)
{
- case spv::StorageClassUniform:
- case spv::StorageClassStorageBuffer:
- case spv::StorageClassImage:
- return false;
- default:
- return true;
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
+ case spv::StorageClassImage:
+ return false;
+ default:
+ return true;
}
}
@@ -476,12 +470,12 @@
{
switch(storageClass)
{
- case spv::StorageClassUniform:
- case spv::StorageClassStorageBuffer:
- case spv::StorageClassPushConstant:
- return true;
- default:
- return false;
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
+ case spv::StorageClassPushConstant:
+ return true;
+ default:
+ return false;
}
}
@@ -499,14 +493,14 @@
{
switch(storageClass)
{
- case spv::StorageClassUniform:
- case spv::StorageClassStorageBuffer:
- case spv::StorageClassPushConstant:
- case spv::StorageClassWorkgroup:
- case spv::StorageClassImage:
- return false;
- default:
- return true;
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
+ case spv::StorageClassPushConstant:
+ case spv::StorageClassWorkgroup:
+ case spv::StorageClassImage:
+ return false;
+ default:
+ return true;
}
}
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 1d9694a..45deba6 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -14,14 +14,14 @@
#include "SpirvShader.hpp"
-#include "SamplerCore.hpp" // TODO: Figure out what's needed.
+#include "SamplerCore.hpp" // TODO: Figure out what's needed.
+#include "Device/Config.hpp"
#include "System/Math.hpp"
#include "Vulkan/VkDebug.hpp"
#include "Vulkan/VkDescriptorSetLayout.hpp"
#include "Vulkan/VkDevice.hpp"
#include "Vulkan/VkImageView.hpp"
#include "Vulkan/VkSampler.hpp"
-#include "Device/Config.hpp"
#include <spirv/unified1/spirv.hpp>
@@ -36,22 +36,22 @@
const auto samplerId = sampler ? sampler->id : 0;
ASSERT(imageDescriptor->imageViewId != 0 && (samplerId != 0 || instruction.samplerMethod == Fetch));
- vk::Device::SamplingRoutineCache::Key key = {inst, imageDescriptor->imageViewId, samplerId};
+ vk::Device::SamplingRoutineCache::Key key = { inst, imageDescriptor->imageViewId, samplerId };
ASSERT(imageDescriptor->device);
if(auto routine = imageDescriptor->device->findInConstCache(key))
{
- return (ImageSampler*)(routine->getEntry());
+ return (ImageSampler *)(routine->getEntry());
}
std::unique_lock<std::mutex> lock(imageDescriptor->device->getSamplingRoutineCacheMutex());
- vk::Device::SamplingRoutineCache* cache = imageDescriptor->device->getSamplingRoutineCache();
+ vk::Device::SamplingRoutineCache *cache = imageDescriptor->device->getSamplingRoutineCache();
auto routine = cache->query(key);
if(routine)
{
- return (ImageSampler*)(routine->getEntry());
+ return (ImageSampler *)(routine->getEntry());
}
auto type = imageDescriptor->type;
@@ -69,9 +69,9 @@
samplerState.swizzle = imageDescriptor->swizzle;
samplerState.gatherComponent = instruction.gatherComponent;
samplerState.highPrecisionFiltering = false;
- samplerState.largeTexture = (imageDescriptor->extent.width > SHRT_MAX) ||
+ samplerState.largeTexture = (imageDescriptor->extent.width > SHRT_MAX) ||
(imageDescriptor->extent.height > SHRT_MAX) ||
- (imageDescriptor->extent.depth > SHRT_MAX);
+ (imageDescriptor->extent.depth > SHRT_MAX);
if(sampler)
{
@@ -100,7 +100,7 @@
routine = emitSamplerRoutine(instruction, samplerState);
cache->add(key, routine);
- return (ImageSampler*)(routine->getEntry());
+ return (ImageSampler *)(routine->getEntry());
}
std::shared_ptr<rr::Routine> SpirvShader::emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState)
@@ -114,17 +114,17 @@
Pointer<SIMD::Float> out = function.Arg<3>();
Pointer<Byte> constants = function.Arg<4>();
- SIMD::Float uvw[4] = {0, 0, 0, 0};
+ SIMD::Float uvw[4] = { 0, 0, 0, 0 };
SIMD::Float q = 0;
SIMD::Float lodOrBias = 0; // Explicit level-of-detail, or bias added to the implicit level-of-detail (depending on samplerMethod).
- Vector4f dsx = {0, 0, 0, 0};
- Vector4f dsy = {0, 0, 0, 0};
- Vector4f offset = {0, 0, 0, 0};
+ Vector4f dsx = { 0, 0, 0, 0 };
+ Vector4f dsy = { 0, 0, 0, 0 };
+ Vector4f offset = { 0, 0, 0, 0 };
SIMD::Int sampleId = 0;
SamplerFunction samplerFunction = instruction.getSamplerFunction();
uint32_t i = 0;
- for( ; i < instruction.coordinates; i++)
+ for(; i < instruction.coordinates; i++)
{
uvw[i] = in[i];
}
@@ -231,28 +231,28 @@
{
switch(sampler->magFilter)
{
- case VK_FILTER_NEAREST:
- switch(sampler->minFilter)
- {
- case VK_FILTER_NEAREST: return FILTER_POINT;
- case VK_FILTER_LINEAR: return FILTER_MIN_LINEAR_MAG_POINT;
+ case VK_FILTER_NEAREST:
+ switch(sampler->minFilter)
+ {
+ case VK_FILTER_NEAREST: return FILTER_POINT;
+ case VK_FILTER_LINEAR: return FILTER_MIN_LINEAR_MAG_POINT;
+ default:
+ UNIMPLEMENTED("minFilter %d", sampler->minFilter);
+ return FILTER_POINT;
+ }
+ break;
+ case VK_FILTER_LINEAR:
+ switch(sampler->minFilter)
+ {
+ case VK_FILTER_NEAREST: return FILTER_MIN_POINT_MAG_LINEAR;
+ case VK_FILTER_LINEAR: return FILTER_LINEAR;
+ default:
+ UNIMPLEMENTED("minFilter %d", sampler->minFilter);
+ return FILTER_POINT;
+ }
+ break;
default:
- UNIMPLEMENTED("minFilter %d", sampler->minFilter);
- return FILTER_POINT;
- }
- break;
- case VK_FILTER_LINEAR:
- switch(sampler->minFilter)
- {
- case VK_FILTER_NEAREST: return FILTER_MIN_POINT_MAG_LINEAR;
- case VK_FILTER_LINEAR: return FILTER_LINEAR;
- default:
- UNIMPLEMENTED("minFilter %d", sampler->minFilter);
- return FILTER_POINT;
- }
- break;
- default:
- break;
+ break;
}
UNIMPLEMENTED("magFilter %d", sampler->magFilter);
@@ -273,11 +273,11 @@
switch(sampler->mipmapMode)
{
- case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MIPMAP_POINT;
- case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MIPMAP_LINEAR;
- default:
- UNIMPLEMENTED("mipmapMode %d", sampler->mipmapMode);
- return MIPMAP_POINT;
+ case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MIPMAP_POINT;
+ case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MIPMAP_LINEAR;
+ default:
+ UNIMPLEMENTED("mipmapMode %d", sampler->mipmapMode);
+ return MIPMAP_POINT;
}
}
@@ -285,77 +285,77 @@
{
switch(imageViewType)
{
- case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
- if(coordinateIndex == 3)
- {
- return ADDRESSING_LAYER;
- }
- // Fall through to CUBE case:
- case VK_IMAGE_VIEW_TYPE_CUBE:
- if(coordinateIndex <= 1) // Cube faces themselves are addressed as 2D images.
- {
- // Vulkan 1.1 spec:
- // "Cube images ignore the wrap modes specified in the sampler. Instead, if VK_FILTER_NEAREST is used within a mip level then
- // VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE is used, and if VK_FILTER_LINEAR is used within a mip level then sampling at the edges
- // is performed as described earlier in the Cube map edge handling section."
- // This corresponds with our 'SEAMLESS' addressing mode.
- return ADDRESSING_SEAMLESS;
- }
- else if(coordinateIndex == 2)
- {
- // The cube face is an index into array layers.
- return ADDRESSING_CUBEFACE;
- }
- else
- {
- return ADDRESSING_UNUSED;
- }
- break;
+ case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
+ if(coordinateIndex == 3)
+ {
+ return ADDRESSING_LAYER;
+ }
+ // Fall through to CUBE case:
+ case VK_IMAGE_VIEW_TYPE_CUBE:
+ if(coordinateIndex <= 1) // Cube faces themselves are addressed as 2D images.
+ {
+ // Vulkan 1.1 spec:
+ // "Cube images ignore the wrap modes specified in the sampler. Instead, if VK_FILTER_NEAREST is used within a mip level then
+ // VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE is used, and if VK_FILTER_LINEAR is used within a mip level then sampling at the edges
+ // is performed as described earlier in the Cube map edge handling section."
+ // This corresponds with our 'SEAMLESS' addressing mode.
+ return ADDRESSING_SEAMLESS;
+ }
+ else if(coordinateIndex == 2)
+ {
+ // The cube face is an index into array layers.
+ return ADDRESSING_CUBEFACE;
+ }
+ else
+ {
+ return ADDRESSING_UNUSED;
+ }
+ break;
- case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
- if(coordinateIndex == 1)
- {
+ case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
+ if(coordinateIndex == 1)
+ {
+ return ADDRESSING_WRAP;
+ }
+ else if(coordinateIndex >= 2)
+ {
+ return ADDRESSING_UNUSED;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_3D:
+ if(coordinateIndex >= 3)
+ {
+ return ADDRESSING_UNUSED;
+ }
+ break;
+
+ case VK_IMAGE_VIEW_TYPE_1D_ARRAY: // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
+ if(coordinateIndex == 1)
+ {
+ return ADDRESSING_WRAP;
+ }
+ // Fall through to 2D_ARRAY case:
+ case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
+ if(coordinateIndex == 2)
+ {
+ return ADDRESSING_LAYER;
+ }
+ else if(coordinateIndex >= 3)
+ {
+ return ADDRESSING_UNUSED;
+ }
+ // Fall through to 2D case:
+ case VK_IMAGE_VIEW_TYPE_2D:
+ if(coordinateIndex >= 2)
+ {
+ return ADDRESSING_UNUSED;
+ }
+ break;
+
+ default:
+ UNIMPLEMENTED("imageViewType %d", imageViewType);
return ADDRESSING_WRAP;
- }
- else if(coordinateIndex >= 2)
- {
- return ADDRESSING_UNUSED;
- }
- break;
-
- case VK_IMAGE_VIEW_TYPE_3D:
- if(coordinateIndex >= 3)
- {
- return ADDRESSING_UNUSED;
- }
- break;
-
- case VK_IMAGE_VIEW_TYPE_1D_ARRAY: // Treated as 2D texture with second coordinate 0. TODO(b/134669567)
- if(coordinateIndex == 1)
- {
- return ADDRESSING_WRAP;
- }
- // Fall through to 2D_ARRAY case:
- case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
- if(coordinateIndex == 2)
- {
- return ADDRESSING_LAYER;
- }
- else if(coordinateIndex >= 3)
- {
- return ADDRESSING_UNUSED;
- }
- // Fall through to 2D case:
- case VK_IMAGE_VIEW_TYPE_2D:
- if(coordinateIndex >= 2)
- {
- return ADDRESSING_UNUSED;
- }
- break;
-
- default:
- UNIMPLEMENTED("imageViewType %d", imageViewType);
- return ADDRESSING_WRAP;
}
if(!sampler)
@@ -373,23 +373,23 @@
VkSamplerAddressMode addressMode = VK_SAMPLER_ADDRESS_MODE_REPEAT;
switch(coordinateIndex)
{
- case 0: addressMode = sampler->addressModeU; break;
- case 1: addressMode = sampler->addressModeV; break;
- case 2: addressMode = sampler->addressModeW; break;
- default: UNSUPPORTED("coordinateIndex: %d", coordinateIndex);
+ case 0: addressMode = sampler->addressModeU; break;
+ case 1: addressMode = sampler->addressModeV; break;
+ case 2: addressMode = sampler->addressModeW; break;
+ default: UNSUPPORTED("coordinateIndex: %d", coordinateIndex);
}
switch(addressMode)
{
- case VK_SAMPLER_ADDRESS_MODE_REPEAT: return ADDRESSING_WRAP;
- case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return ADDRESSING_MIRROR;
- case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return ADDRESSING_CLAMP;
- case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return ADDRESSING_BORDER;
- case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return ADDRESSING_MIRRORONCE;
- default:
- UNIMPLEMENTED("addressMode %d", addressMode);
- return ADDRESSING_WRAP;
+ case VK_SAMPLER_ADDRESS_MODE_REPEAT: return ADDRESSING_WRAP;
+ case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return ADDRESSING_MIRROR;
+ case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return ADDRESSING_CLAMP;
+ case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return ADDRESSING_BORDER;
+ case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return ADDRESSING_MIRRORONCE;
+ default:
+ UNIMPLEMENTED("addressMode %d", addressMode);
+ return ADDRESSING_WRAP;
}
}
-} // namespace sw
+} // namespace sw
diff --git a/src/Pipeline/SpirvShaderSpec.cpp b/src/Pipeline/SpirvShaderSpec.cpp
index 2d3c402..3bfd153 100644
--- a/src/Pipeline/SpirvShaderSpec.cpp
+++ b/src/Pipeline/SpirvShaderSpec.cpp
@@ -24,132 +24,132 @@
switch(opcode)
{
- case spv::OpIAdd:
- case spv::OpISub:
- case spv::OpIMul:
- case spv::OpUDiv:
- case spv::OpSDiv:
- case spv::OpUMod:
- case spv::OpSMod:
- case spv::OpSRem:
- case spv::OpShiftRightLogical:
- case spv::OpShiftRightArithmetic:
- case spv::OpShiftLeftLogical:
- case spv::OpBitwiseOr:
- case spv::OpLogicalOr:
- case spv::OpBitwiseAnd:
- case spv::OpLogicalAnd:
- case spv::OpBitwiseXor:
- case spv::OpLogicalEqual:
- case spv::OpIEqual:
- case spv::OpLogicalNotEqual:
- case spv::OpINotEqual:
- case spv::OpULessThan:
- case spv::OpSLessThan:
- case spv::OpUGreaterThan:
- case spv::OpSGreaterThan:
- case spv::OpULessThanEqual:
- case spv::OpSLessThanEqual:
- case spv::OpUGreaterThanEqual:
- case spv::OpSGreaterThanEqual:
- EvalSpecConstantBinaryOp(insn);
- break;
+ case spv::OpIAdd:
+ case spv::OpISub:
+ case spv::OpIMul:
+ case spv::OpUDiv:
+ case spv::OpSDiv:
+ case spv::OpUMod:
+ case spv::OpSMod:
+ case spv::OpSRem:
+ case spv::OpShiftRightLogical:
+ case spv::OpShiftRightArithmetic:
+ case spv::OpShiftLeftLogical:
+ case spv::OpBitwiseOr:
+ case spv::OpLogicalOr:
+ case spv::OpBitwiseAnd:
+ case spv::OpLogicalAnd:
+ case spv::OpBitwiseXor:
+ case spv::OpLogicalEqual:
+ case spv::OpIEqual:
+ case spv::OpLogicalNotEqual:
+ case spv::OpINotEqual:
+ case spv::OpULessThan:
+ case spv::OpSLessThan:
+ case spv::OpUGreaterThan:
+ case spv::OpSGreaterThan:
+ case spv::OpULessThanEqual:
+ case spv::OpSLessThanEqual:
+ case spv::OpUGreaterThanEqual:
+ case spv::OpSGreaterThanEqual:
+ EvalSpecConstantBinaryOp(insn);
+ break;
- case spv::OpSConvert:
- case spv::OpFConvert:
- case spv::OpUConvert:
- case spv::OpSNegate:
- case spv::OpNot:
- case spv::OpLogicalNot:
- case spv::OpQuantizeToF16:
- EvalSpecConstantUnaryOp(insn);
- break;
+ case spv::OpSConvert:
+ case spv::OpFConvert:
+ case spv::OpUConvert:
+ case spv::OpSNegate:
+ case spv::OpNot:
+ case spv::OpLogicalNot:
+ case spv::OpQuantizeToF16:
+ EvalSpecConstantUnaryOp(insn);
+ break;
- case spv::OpSelect:
- {
- auto &result = CreateConstant(insn);
- auto const &cond = getObject(insn.word(4));
- auto condIsScalar = (getType(cond.type).sizeInComponents == 1);
- auto const &left = getObject(insn.word(5));
- auto const &right = getObject(insn.word(6));
-
- for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
+ case spv::OpSelect:
{
- auto sel = cond.constantValue[condIsScalar ? 0 : i];
- result.constantValue[i] = sel ? left.constantValue[i] : right.constantValue[i];
- }
- break;
- }
+ auto &result = CreateConstant(insn);
+ auto const &cond = getObject(insn.word(4));
+ auto condIsScalar = (getType(cond.type).sizeInComponents == 1);
+ auto const &left = getObject(insn.word(5));
+ auto const &right = getObject(insn.word(6));
- case spv::OpCompositeExtract:
- {
- auto &result = CreateConstant(insn);
- auto const &compositeObject = getObject(insn.word(4));
- auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - 5, insn.wordPointer(5));
-
- for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
- {
- result.constantValue[i] = compositeObject.constantValue[firstComponent + i];
- }
- break;
- }
-
- case spv::OpCompositeInsert:
- {
- auto &result = CreateConstant(insn);
- auto const &newPart = getObject(insn.word(4));
- auto const &oldObject = getObject(insn.word(5));
- auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - 6, insn.wordPointer(6));
-
- // old components before
- for(auto i = 0u; i < firstNewComponent; i++)
- {
- result.constantValue[i] = oldObject.constantValue[i];
- }
- // new part
- for(auto i = 0u; i < getType(newPart.type).sizeInComponents; i++)
- {
- result.constantValue[firstNewComponent + i] = newPart.constantValue[i];
- }
- // old components after
- for(auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++)
- {
- result.constantValue[i] = oldObject.constantValue[i];
- }
- break;
- }
-
- case spv::OpVectorShuffle:
- {
- auto &result = CreateConstant(insn);
- auto const &firstHalf = getObject(insn.word(4));
- auto const &secondHalf = getObject(insn.word(5));
-
- for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
- {
- auto selector = insn.word(6 + i);
- if(selector == static_cast<uint32_t>(-1))
+ for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
{
- // Undefined value, we'll use zero
- result.constantValue[i] = 0;
+ auto sel = cond.constantValue[condIsScalar ? 0 : i];
+ result.constantValue[i] = sel ? left.constantValue[i] : right.constantValue[i];
}
- else if(selector < getType(firstHalf.type).sizeInComponents)
- {
- result.constantValue[i] = firstHalf.constantValue[selector];
- }
- else
- {
- result.constantValue[i] = secondHalf.constantValue[selector - getType(firstHalf.type).sizeInComponents];
- }
+ break;
}
- break;
- }
- default:
- // Other spec constant ops are possible, but require capabilities that are
- // not exposed in our Vulkan implementation (eg Kernel), so we should never
- // get here for correct shaders.
- UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str());
+ case spv::OpCompositeExtract:
+ {
+ auto &result = CreateConstant(insn);
+ auto const &compositeObject = getObject(insn.word(4));
+ auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - 5, insn.wordPointer(5));
+
+ for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
+ {
+ result.constantValue[i] = compositeObject.constantValue[firstComponent + i];
+ }
+ break;
+ }
+
+ case spv::OpCompositeInsert:
+ {
+ auto &result = CreateConstant(insn);
+ auto const &newPart = getObject(insn.word(4));
+ auto const &oldObject = getObject(insn.word(5));
+ auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - 6, insn.wordPointer(6));
+
+ // old components before
+ for(auto i = 0u; i < firstNewComponent; i++)
+ {
+ result.constantValue[i] = oldObject.constantValue[i];
+ }
+ // new part
+ for(auto i = 0u; i < getType(newPart.type).sizeInComponents; i++)
+ {
+ result.constantValue[firstNewComponent + i] = newPart.constantValue[i];
+ }
+ // old components after
+ for(auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++)
+ {
+ result.constantValue[i] = oldObject.constantValue[i];
+ }
+ break;
+ }
+
+ case spv::OpVectorShuffle:
+ {
+ auto &result = CreateConstant(insn);
+ auto const &firstHalf = getObject(insn.word(4));
+ auto const &secondHalf = getObject(insn.word(5));
+
+ for(auto i = 0u; i < getType(result.type).sizeInComponents; i++)
+ {
+ auto selector = insn.word(6 + i);
+ if(selector == static_cast<uint32_t>(-1))
+ {
+ // Undefined value, we'll use zero
+ result.constantValue[i] = 0;
+ }
+ else if(selector < getType(firstHalf.type).sizeInComponents)
+ {
+ result.constantValue[i] = firstHalf.constantValue[selector];
+ }
+ else
+ {
+ result.constantValue[i] = secondHalf.constantValue[selector - getType(firstHalf.type).sizeInComponents];
+ }
+ }
+ break;
+ }
+
+ default:
+ // Other spec constant ops are possible, but require capabilities that are
+ // not exposed in our Vulkan implementation (eg Kernel), so we should never
+ // get here for correct shaders.
+ UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str());
}
}
@@ -168,37 +168,37 @@
switch(opcode)
{
- case spv::OpSConvert:
- case spv::OpFConvert:
- case spv::OpUConvert:
- UNREACHABLE("Not possible until we have multiple bit widths");
- break;
+ case spv::OpSConvert:
+ case spv::OpFConvert:
+ case spv::OpUConvert:
+ UNREACHABLE("Not possible until we have multiple bit widths");
+ break;
- case spv::OpSNegate:
- v = -(int)l;
- break;
- case spv::OpNot:
- case spv::OpLogicalNot:
- v = ~l;
- break;
+ case spv::OpSNegate:
+ v = -(int)l;
+ break;
+ case spv::OpNot:
+ case spv::OpLogicalNot:
+ v = ~l;
+ break;
- case spv::OpQuantizeToF16:
- {
- // Can do this nicer with host code, but want to perfectly mirror the reactor code we emit.
- auto abs = bit_cast<float>(l & 0x7FFFFFFF);
- auto sign = l & 0x80000000;
- auto isZero = abs < 0.000061035f ? ~0u : 0u;
- auto isInf = abs > 65504.0f ? ~0u : 0u;
- auto isNaN = (abs != abs) ? ~0u : 0u;
- auto isInfOrNan = isInf | isNaN;
- v = l & 0xFFFFE000;
- v &= ~isZero | 0x80000000;
- v = sign | (isInfOrNan & 0x7F800000) | (~isInfOrNan & v);
- v |= isNaN & 0x400000;
- break;
- }
- default:
- UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str());
+ case spv::OpQuantizeToF16:
+ {
+ // Can do this nicer with host code, but want to perfectly mirror the reactor code we emit.
+ auto abs = bit_cast<float>(l & 0x7FFFFFFF);
+ auto sign = l & 0x80000000;
+ auto isZero = abs < 0.000061035f ? ~0u : 0u;
+ auto isInf = abs > 65504.0f ? ~0u : 0u;
+ auto isNaN = (abs != abs) ? ~0u : 0u;
+ auto isInfOrNan = isInf | isNaN;
+ v = l & 0xFFFFE000;
+ v &= ~isZero | 0x80000000;
+ v = sign | (isInfOrNan & 0x7F800000) | (~isInfOrNan & v);
+ v |= isNaN & 0x400000;
+ break;
+ }
+ default:
+ UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str());
}
}
}
@@ -220,93 +220,93 @@
switch(opcode)
{
- case spv::OpIAdd:
- v = l + r;
- break;
- case spv::OpISub:
- v = l - r;
- break;
- case spv::OpIMul:
- v = l * r;
- break;
- case spv::OpUDiv:
- v = (r == 0) ? 0 : l / r;
- break;
- case spv::OpUMod:
- v = (r == 0) ? 0 : l % r;
- break;
- case spv::OpSDiv:
- if(r == 0) r = UINT32_MAX;
- if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
- v = static_cast<int32_t>(l) / static_cast<int32_t>(r);
- break;
- case spv::OpSRem:
- if(r == 0) r = UINT32_MAX;
- if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
- v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
- break;
- case spv::OpSMod:
- if(r == 0) r = UINT32_MAX;
- if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
- // Test if a signed-multiply would be negative.
- v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
- if((v & 0x80000000) != (r & 0x80000000))
- v += r;
- break;
- case spv::OpShiftRightLogical:
- v = l >> r;
- break;
- case spv::OpShiftRightArithmetic:
- v = static_cast<int32_t>(l) >> r;
- break;
- case spv::OpShiftLeftLogical:
- v = l << r;
- break;
- case spv::OpBitwiseOr:
- case spv::OpLogicalOr:
- v = l | r;
- break;
- case spv::OpBitwiseAnd:
- case spv::OpLogicalAnd:
- v = l & r;
- break;
- case spv::OpBitwiseXor:
- v = l ^ r;
- break;
- case spv::OpLogicalEqual:
- case spv::OpIEqual:
- v = (l == r) ? ~0u : 0u;
- break;
- case spv::OpLogicalNotEqual:
- case spv::OpINotEqual:
- v = (l != r) ? ~0u : 0u;
- break;
- case spv::OpULessThan:
- v = l < r ? ~0u : 0u;
- break;
- case spv::OpSLessThan:
- v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~0u : 0u;
- break;
- case spv::OpUGreaterThan:
- v = l > r ? ~0u : 0u;
- break;
- case spv::OpSGreaterThan:
- v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~0u : 0u;
- break;
- case spv::OpULessThanEqual:
- v = l <= r ? ~0u : 0u;
- break;
- case spv::OpSLessThanEqual:
- v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~0u : 0u;
- break;
- case spv::OpUGreaterThanEqual:
- v = l >= r ? ~0u : 0u;
- break;
- case spv::OpSGreaterThanEqual:
- v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~0u : 0u;
- break;
- default:
- UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str());
+ case spv::OpIAdd:
+ v = l + r;
+ break;
+ case spv::OpISub:
+ v = l - r;
+ break;
+ case spv::OpIMul:
+ v = l * r;
+ break;
+ case spv::OpUDiv:
+ v = (r == 0) ? 0 : l / r;
+ break;
+ case spv::OpUMod:
+ v = (r == 0) ? 0 : l % r;
+ break;
+ case spv::OpSDiv:
+ if(r == 0) r = UINT32_MAX;
+ if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
+ v = static_cast<int32_t>(l) / static_cast<int32_t>(r);
+ break;
+ case spv::OpSRem:
+ if(r == 0) r = UINT32_MAX;
+ if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
+ v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
+ break;
+ case spv::OpSMod:
+ if(r == 0) r = UINT32_MAX;
+ if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
+ // Test if a signed-multiply would be negative.
+ v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
+ if((v & 0x80000000) != (r & 0x80000000))
+ v += r;
+ break;
+ case spv::OpShiftRightLogical:
+ v = l >> r;
+ break;
+ case spv::OpShiftRightArithmetic:
+ v = static_cast<int32_t>(l) >> r;
+ break;
+ case spv::OpShiftLeftLogical:
+ v = l << r;
+ break;
+ case spv::OpBitwiseOr:
+ case spv::OpLogicalOr:
+ v = l | r;
+ break;
+ case spv::OpBitwiseAnd:
+ case spv::OpLogicalAnd:
+ v = l & r;
+ break;
+ case spv::OpBitwiseXor:
+ v = l ^ r;
+ break;
+ case spv::OpLogicalEqual:
+ case spv::OpIEqual:
+ v = (l == r) ? ~0u : 0u;
+ break;
+ case spv::OpLogicalNotEqual:
+ case spv::OpINotEqual:
+ v = (l != r) ? ~0u : 0u;
+ break;
+ case spv::OpULessThan:
+ v = l < r ? ~0u : 0u;
+ break;
+ case spv::OpSLessThan:
+ v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~0u : 0u;
+ break;
+ case spv::OpUGreaterThan:
+ v = l > r ? ~0u : 0u;
+ break;
+ case spv::OpSGreaterThan:
+ v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~0u : 0u;
+ break;
+ case spv::OpULessThanEqual:
+ v = l <= r ? ~0u : 0u;
+ break;
+ case spv::OpSLessThanEqual:
+ v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~0u : 0u;
+ break;
+ case spv::OpUGreaterThanEqual:
+ v = l >= r ? ~0u : 0u;
+ break;
+ case spv::OpSGreaterThanEqual:
+ v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~0u : 0u;
+ break;
+ default:
+ UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str());
}
}
}
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp
index b2a731a..615bcf3 100644
--- a/src/Pipeline/VertexProgram.cpp
+++ b/src/Pipeline/VertexProgram.cpp
@@ -25,30 +25,27 @@
namespace sw {
VertexProgram::VertexProgram(
- const VertexProcessor::State &state,
- vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *spirvShader,
- const vk::DescriptorSet::Bindings &descriptorSets)
- : VertexRoutine(state, pipelineLayout, spirvShader),
- descriptorSets(descriptorSets)
+ const VertexProcessor::State &state,
+ vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *spirvShader,
+ const vk::DescriptorSet::Bindings &descriptorSets)
+ : VertexRoutine(state, pipelineLayout, spirvShader)
+ , descriptorSets(descriptorSets)
{
routine.setImmutableInputBuiltins(spirvShader);
- routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID)))));
});
- routine.setInputBuiltin(spirvShader, spv::BuiltInInstanceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine.setInputBuiltin(spirvShader, spv::BuiltInInstanceIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
// TODO: we could do better here; we know InstanceIndex is uniform across all lanes
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, instanceID)))));
});
- routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
- {
+ routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
});
@@ -63,7 +60,7 @@
{
}
-void VertexProgram::program(Pointer<UInt> &batch, UInt& vertexCount)
+void VertexProgram::program(Pointer<UInt> &batch, UInt &vertexCount)
{
auto it = spirvShader->inputBuiltins.find(spv::BuiltInVertexIndex);
if(it != spirvShader->inputBuiltins.end())
@@ -71,8 +68,8 @@
assert(it->second.SizeInComponents == 1);
routine.getVariable(it->second.Id)[it->second.FirstComponent] =
- As<Float4>(*Pointer<Int4>(As<Pointer<Int4>>(batch)) +
- Int4(*Pointer<Int>(data + OFFSET(DrawData, baseVertex))));
+ As<Float4>(*Pointer<Int4>(As<Pointer<Int4>>(batch)) +
+ Int4(*Pointer<Int>(data + OFFSET(DrawData, baseVertex))));
}
auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
@@ -82,4 +79,4 @@
spirvShader->emitEpilog(&routine);
}
-} // namepsace sw
+} // namespace sw
diff --git a/src/Pipeline/VertexProgram.hpp b/src/Pipeline/VertexProgram.hpp
index 9a14713..6abb727 100644
--- a/src/Pipeline/VertexProgram.hpp
+++ b/src/Pipeline/VertexProgram.hpp
@@ -15,8 +15,8 @@
#ifndef sw_VertexProgram_hpp
#define sw_VertexProgram_hpp
-#include "VertexRoutine.hpp"
#include "ShaderCore.hpp"
+#include "VertexRoutine.hpp"
namespace sw {
@@ -26,19 +26,19 @@
{
public:
VertexProgram(
- const VertexProcessor::State &state,
- vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *spirvShader,
- const vk::DescriptorSet::Bindings &descriptorSets);
+ const VertexProcessor::State &state,
+ vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *spirvShader,
+ const vk::DescriptorSet::Bindings &descriptorSets);
virtual ~VertexProgram();
private:
- void program(Pointer<UInt> &batch, UInt& vertexCount) override;
+ void program(Pointer<UInt> &batch, UInt &vertexCount) override;
const vk::DescriptorSet::Bindings &descriptorSets;
};
-} // namepsace sw
+} // namespace sw
-#endif // sw_VertexProgram_hpp
+#endif // sw_VertexProgram_hpp
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index 85c395a..517ac70 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -16,20 +16,20 @@
#include "Constants.hpp"
#include "SpirvShader.hpp"
-#include "Device/Vertex.hpp"
#include "Device/Renderer.hpp"
-#include "Vulkan/VkDebug.hpp"
+#include "Device/Vertex.hpp"
#include "System/Half.hpp"
+#include "Vulkan/VkDebug.hpp"
namespace sw {
VertexRoutine::VertexRoutine(
- const VertexProcessor::State &state,
- vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *spirvShader)
- : routine(pipelineLayout),
- state(state),
- spirvShader(spirvShader)
+ const VertexProcessor::State &state,
+ vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *spirvShader)
+ : routine(pipelineLayout)
+ , state(state)
+ , spirvShader(spirvShader)
{
spirvShader->emitProlog(&routine);
}
@@ -40,13 +40,13 @@
void VertexRoutine::generate()
{
- Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
- Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
- Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache,tag));
+ Pointer<Byte> cache = task + OFFSET(VertexTask, vertexCache);
+ Pointer<Byte> vertexCache = cache + OFFSET(VertexCache, vertex);
+ Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache, tag));
- UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
+ UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask, vertexCount));
- constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
+ constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, constants));
// Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer.
// On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache
@@ -93,7 +93,7 @@
spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
{
- Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4));
+ Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * (i / 4));
UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4));
Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex));
UInt robustnessSize(0);
@@ -129,19 +129,19 @@
Int4 minY = CmpNLE(-posW, posY);
Int4 minZ = CmpNLE(Float4(0.0f), posZ);
- clipFlags = Pointer<Int>(constants + OFFSET(Constants,maxX))[SignMask(maxX)];
- clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxY))[SignMask(maxY)];
- clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxZ))[SignMask(maxZ)];
- clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minX))[SignMask(minX)];
- clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minY))[SignMask(minY)];
- clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minZ))[SignMask(minZ)];
+ clipFlags = Pointer<Int>(constants + OFFSET(Constants, maxX))[SignMask(maxX)];
+ clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxY))[SignMask(maxY)];
+ clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxZ))[SignMask(maxZ)];
+ clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minX))[SignMask(minX)];
+ clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minY))[SignMask(minY)];
+ clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minZ))[SignMask(minZ)];
- Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
- Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
- Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
+ Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
+ Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
+ Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
Int4 finiteXYZ = finiteX & finiteY & finiteZ;
- clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)];
+ clipFlags |= Pointer<Int>(constants + OFFSET(Constants, fini))[SignMask(finiteXYZ)];
}
void VertexRoutine::computeCullMask()
@@ -162,7 +162,7 @@
}
Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
- bool robustBufferAccess, UInt & robustnessSize, Int baseVertex)
+ bool robustBufferAccess, UInt &robustnessSize, Int baseVertex)
{
Vector4f v;
// Because of the following rule in the Vulkan spec, we do not care if a very large negative
@@ -193,7 +193,7 @@
switch(stream.type)
{
- case STREAMTYPE_FLOAT:
+ case STREAMTYPE_FLOAT:
{
if(stream.count == 0)
{
@@ -220,86 +220,86 @@
switch(stream.attribType)
{
- case SpirvShader::ATTRIBTYPE_INT:
- if(stream.count >= 1) v.x = As<Float4>(Int4(v.x));
- if(stream.count >= 2) v.x = As<Float4>(Int4(v.y));
- if(stream.count >= 3) v.x = As<Float4>(Int4(v.z));
- if(stream.count >= 4) v.x = As<Float4>(Int4(v.w));
- break;
- case SpirvShader::ATTRIBTYPE_UINT:
- if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x));
- if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y));
- if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z));
- if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w));
- break;
- default:
- break;
+ case SpirvShader::ATTRIBTYPE_INT:
+ if(stream.count >= 1) v.x = As<Float4>(Int4(v.x));
+ if(stream.count >= 2) v.x = As<Float4>(Int4(v.y));
+ if(stream.count >= 3) v.x = As<Float4>(Int4(v.z));
+ if(stream.count >= 4) v.x = As<Float4>(Int4(v.w));
+ break;
+ case SpirvShader::ATTRIBTYPE_UINT:
+ if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x));
+ if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y));
+ if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z));
+ if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w));
+ break;
+ default:
+ break;
}
}
}
break;
- case STREAMTYPE_BYTE:
- if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float
- {
- v.x = Float4(*Pointer<Byte4>(source0));
- v.y = Float4(*Pointer<Byte4>(source1));
- v.z = Float4(*Pointer<Byte4>(source2));
- v.w = Float4(*Pointer<Byte4>(source3));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
- if(stream.normalized)
+ case STREAMTYPE_BYTE:
+ if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float
{
- if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
- if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
- if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
- if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
+ v.x = Float4(*Pointer<Byte4>(source0));
+ v.y = Float4(*Pointer<Byte4>(source1));
+ v.z = Float4(*Pointer<Byte4>(source2));
+ v.w = Float4(*Pointer<Byte4>(source3));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+ if(stream.normalized)
+ {
+ if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+ if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+ if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+ if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+ }
}
- }
- else // Stream: UByte, Shader attrib: Int / UInt
- {
- v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
- v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
- v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
- v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
- }
- break;
- case STREAMTYPE_SBYTE:
- if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float
- {
- v.x = Float4(*Pointer<SByte4>(source0));
- v.y = Float4(*Pointer<SByte4>(source1));
- v.z = Float4(*Pointer<SByte4>(source2));
- v.w = Float4(*Pointer<SByte4>(source3));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
- if(stream.normalized)
+ else // Stream: UByte, Shader attrib: Int / UInt
{
- if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
- if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
- if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
- if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
- }
- }
- else // Stream: SByte, Shader attrib: Int / UInt
- {
- v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
- v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
- v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
- v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
+ v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
+ v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
+ v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
+ v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
- }
- break;
- case STREAMTYPE_COLOR:
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
+ break;
+ case STREAMTYPE_SBYTE:
+ if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float
+ {
+ v.x = Float4(*Pointer<SByte4>(source0));
+ v.y = Float4(*Pointer<SByte4>(source1));
+ v.z = Float4(*Pointer<SByte4>(source2));
+ v.w = Float4(*Pointer<SByte4>(source3));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+ if(stream.normalized)
+ {
+ if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+ if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+ if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+ if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+ }
+ }
+ else // Stream: SByte, Shader attrib: Int / UInt
+ {
+ v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
+ v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
+ v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
+ v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
+ break;
+ case STREAMTYPE_COLOR:
{
- v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
- v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
- v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
- v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
+ v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+ v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+ v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+ v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
transpose4x4(v.x, v.y, v.z, v.w);
@@ -309,119 +309,119 @@
v.z = t;
}
break;
- case STREAMTYPE_SHORT:
- if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
- {
- v.x = Float4(*Pointer<Short4>(source0));
- v.y = Float4(*Pointer<Short4>(source1));
- v.z = Float4(*Pointer<Short4>(source2));
- v.w = Float4(*Pointer<Short4>(source3));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
- if(stream.normalized)
+ case STREAMTYPE_SHORT:
+ if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
{
- if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
- if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
- if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
- if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
+ v.x = Float4(*Pointer<Short4>(source0));
+ v.y = Float4(*Pointer<Short4>(source1));
+ v.z = Float4(*Pointer<Short4>(source2));
+ v.w = Float4(*Pointer<Short4>(source3));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+ if(stream.normalized)
+ {
+ if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+ if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+ if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+ if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+ }
}
- }
- else // Stream: Short, Shader attrib: Int/UInt, no type conversion
- {
- v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
- v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
- v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
- v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
- }
- break;
- case STREAMTYPE_USHORT:
- if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
- {
- v.x = Float4(*Pointer<UShort4>(source0));
- v.y = Float4(*Pointer<UShort4>(source1));
- v.z = Float4(*Pointer<UShort4>(source2));
- v.w = Float4(*Pointer<UShort4>(source3));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
- if(stream.normalized)
+ else // Stream: Short, Shader attrib: Int/UInt, no type conversion
{
- if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
- if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
- if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
- if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
+ v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
+ v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
+ v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
+ v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
}
- }
- else // Stream: UShort, Shader attrib: Int/UInt, no type conversion
- {
- v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
- v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
- v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
- v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
- }
- break;
- case STREAMTYPE_INT:
- if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
- {
- v.x = Float4(*Pointer<Int4>(source0));
- v.y = Float4(*Pointer<Int4>(source1));
- v.z = Float4(*Pointer<Int4>(source2));
- v.w = Float4(*Pointer<Int4>(source3));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
- if(stream.normalized)
+ break;
+ case STREAMTYPE_USHORT:
+ if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
{
- if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
- if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
- if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
- if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+ v.x = Float4(*Pointer<UShort4>(source0));
+ v.y = Float4(*Pointer<UShort4>(source1));
+ v.z = Float4(*Pointer<UShort4>(source2));
+ v.w = Float4(*Pointer<UShort4>(source3));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+ if(stream.normalized)
+ {
+ if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+ if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+ if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+ if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+ }
}
- }
- else // Stream: Int, Shader attrib: Int/UInt, no type conversion
- {
- v.x = *Pointer<Float4>(source0);
- v.y = *Pointer<Float4>(source1);
- v.z = *Pointer<Float4>(source2);
- v.w = *Pointer<Float4>(source3);
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
- }
- break;
- case STREAMTYPE_UINT:
- if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float
- {
- v.x = Float4(*Pointer<UInt4>(source0));
- v.y = Float4(*Pointer<UInt4>(source1));
- v.z = Float4(*Pointer<UInt4>(source2));
- v.w = Float4(*Pointer<UInt4>(source3));
-
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
- if(stream.normalized)
+ else // Stream: UShort, Shader attrib: Int/UInt, no type conversion
{
- if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
- if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
- if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
- if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
- }
- }
- else // Stream: UInt, Shader attrib: Int/UInt, no type conversion
- {
- v.x = *Pointer<Float4>(source0);
- v.y = *Pointer<Float4>(source1);
- v.z = *Pointer<Float4>(source2);
- v.w = *Pointer<Float4>(source3);
+ v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
+ v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
+ v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
+ v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
- }
- break;
- case STREAMTYPE_HALF:
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
+ break;
+ case STREAMTYPE_INT:
+ if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
+ {
+ v.x = Float4(*Pointer<Int4>(source0));
+ v.y = Float4(*Pointer<Int4>(source1));
+ v.z = Float4(*Pointer<Int4>(source2));
+ v.w = Float4(*Pointer<Int4>(source3));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+ if(stream.normalized)
+ {
+ if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+ if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+ if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+ if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+ }
+ }
+ else // Stream: Int, Shader attrib: Int/UInt, no type conversion
+ {
+ v.x = *Pointer<Float4>(source0);
+ v.y = *Pointer<Float4>(source1);
+ v.z = *Pointer<Float4>(source2);
+ v.w = *Pointer<Float4>(source3);
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
+ break;
+ case STREAMTYPE_UINT:
+ if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float
+ {
+ v.x = Float4(*Pointer<UInt4>(source0));
+ v.y = Float4(*Pointer<UInt4>(source1));
+ v.z = Float4(*Pointer<UInt4>(source2));
+ v.w = Float4(*Pointer<UInt4>(source3));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
+ if(stream.normalized)
+ {
+ if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+ if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+ if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+ if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+ }
+ }
+ else // Stream: UInt, Shader attrib: Int/UInt, no type conversion
+ {
+ v.x = *Pointer<Float4>(source0);
+ v.y = *Pointer<Float4>(source1);
+ v.z = *Pointer<Float4>(source2);
+ v.w = *Pointer<Float4>(source3);
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
+ break;
+ case STREAMTYPE_HALF:
{
if(stream.count >= 1)
{
@@ -430,10 +430,10 @@
UShort x2 = *Pointer<UShort>(source2 + 0);
UShort x3 = *Pointer<UShort>(source3 + 0);
- v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4);
- v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4);
- v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4);
- v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4);
+ v.x.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x0) * 4);
+ v.x.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x1) * 4);
+ v.x.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x2) * 4);
+ v.x.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x3) * 4);
}
if(stream.count >= 2)
@@ -443,10 +443,10 @@
UShort y2 = *Pointer<UShort>(source2 + 2);
UShort y3 = *Pointer<UShort>(source3 + 2);
- v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4);
- v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4);
- v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4);
- v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4);
+ v.y.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y0) * 4);
+ v.y.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y1) * 4);
+ v.y.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y2) * 4);
+ v.y.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y3) * 4);
}
if(stream.count >= 3)
@@ -456,10 +456,10 @@
UShort z2 = *Pointer<UShort>(source2 + 4);
UShort z3 = *Pointer<UShort>(source3 + 4);
- v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4);
- v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4);
- v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4);
- v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4);
+ v.z.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z0) * 4);
+ v.z.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z1) * 4);
+ v.z.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z2) * 4);
+ v.z.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z3) * 4);
}
if(stream.count >= 4)
@@ -469,14 +469,14 @@
UShort w2 = *Pointer<UShort>(source2 + 6);
UShort w3 = *Pointer<UShort>(source3 + 6);
- v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4);
- v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4);
- v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4);
- v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4);
+ v.w.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w0) * 4);
+ v.w.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w1) * 4);
+ v.w.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w2) * 4);
+ v.w.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w3) * 4);
}
}
break;
- case STREAMTYPE_2_10_10_10_INT:
+ case STREAMTYPE_2_10_10_10_INT:
{
Int4 src;
src = Insert(src, *Pointer<Int>(source0), 0);
@@ -498,7 +498,7 @@
}
}
break;
- case STREAMTYPE_2_10_10_10_UINT:
+ case STREAMTYPE_2_10_10_10_UINT:
{
Int4 src;
src = Insert(src, *Pointer<Int>(source0), 0);
@@ -520,8 +520,8 @@
}
}
break;
- default:
- UNSUPPORTED("stream.type %d", int(stream.type));
+ default:
+ UNSUPPORTED("stream.type %d", int(stream.type));
}
if(stream.count < 1) v.x = Float4(0.0f);
@@ -567,17 +567,17 @@
Float4 rhw = Float4(1.0f) / w;
Vector4f proj;
- proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF))));
- proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF))));
+ proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData, WxF))));
+ proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData, HxF))));
proj.z = pos.z * rhw;
proj.w = rhw;
transpose4x4(pos.x, pos.y, pos.z, pos.w);
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,position), 16) = pos.w;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,position), 16) = pos.z;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,position), 16) = pos.y;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,position), 16) = pos.x;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, position), 16) = pos.w;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, position), 16) = pos.z;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, position), 16) = pos.y;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, position), 16) = pos.x;
it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize);
if(it != spirvShader->outputBuiltins.end())
@@ -585,10 +585,10 @@
ASSERT(it->second.SizeInComponents == 1);
auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent];
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,pointSize)) = Extract(psize, 3);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,pointSize)) = Extract(psize, 2);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,pointSize)) = Extract(psize, 1);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, pointSize)) = Extract(psize, 3);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, pointSize)) = Extract(psize, 2);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, pointSize)) = Extract(psize, 1);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, pointSize)) = Extract(psize, 0);
}
it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
@@ -598,10 +598,10 @@
for(unsigned int i = 0; i < count; i++)
{
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 3);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 2);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 1);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 0);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 3);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 2);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 1);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 0);
}
}
@@ -612,29 +612,29 @@
for(unsigned int i = 0; i < count; i++)
{
auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 3);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 2);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 1);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 0);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 3);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 2);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 1);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 0);
}
}
- *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
- *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
- *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 8) & 0x0000000FF;
- *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 0) & 0x0000000FF;
+ *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
+ *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
+ *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 8) & 0x0000000FF;
+ *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 0) & 0x0000000FF;
- *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullMask)) = -((cullMask >> 3) & 1);
- *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullMask)) = -((cullMask >> 2) & 1);
- *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullMask)) = -((cullMask >> 1) & 1);
- *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullMask)) = -((cullMask >> 0) & 1);
+ *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullMask)) = -((cullMask >> 3) & 1);
+ *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullMask)) = -((cullMask >> 2) & 1);
+ *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullMask)) = -((cullMask >> 1) & 1);
+ *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullMask)) = -((cullMask >> 0) & 1);
transpose4x4(proj.x, proj.y, proj.z, proj.w);
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,projected), 16) = proj.w;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,projected), 16) = proj.z;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,projected), 16) = proj.y;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,projected), 16) = proj.x;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, projected), 16) = proj.w;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, projected), 16) = proj.z;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, projected), 16) = proj.y;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, projected), 16) = proj.x;
for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
{
@@ -651,22 +651,22 @@
transpose4x4(v.x, v.y, v.z, v.w);
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,v[i]), 16) = v.w;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,v[i]), 16) = v.z;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,v[i]), 16) = v.y;
- *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,v[i]), 16) = v.x;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, v[i]), 16) = v.w;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, v[i]), 16) = v.z;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, v[i]), 16) = v.y;
+ *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, v[i]), 16) = v.x;
}
}
}
void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry)
{
- *Pointer<Int4>(vertex + OFFSET(Vertex,position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,position));
- *Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize));
+ *Pointer<Int4>(vertex + OFFSET(Vertex, position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, position));
+ *Pointer<Int>(vertex + OFFSET(Vertex, pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, pointSize));
- *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags));
- *Pointer<Int>(vertex + OFFSET(Vertex,cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,cullMask));
- *Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected));
+ *Pointer<Int>(vertex + OFFSET(Vertex, clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, clipFlags));
+ *Pointer<Int>(vertex + OFFSET(Vertex, cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, cullMask));
+ *Pointer<Int4>(vertex + OFFSET(Vertex, projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, projected));
for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
{
diff --git a/src/Pipeline/VertexRoutine.hpp b/src/Pipeline/VertexRoutine.hpp
index ab458b4..c490cd7 100644
--- a/src/Pipeline/VertexRoutine.hpp
+++ b/src/Pipeline/VertexRoutine.hpp
@@ -20,14 +20,21 @@
#include "Device/Color.hpp"
#include "Device/VertexProcessor.hpp"
-namespace vk { class PipelineLayout; }
+namespace vk {
+class PipelineLayout;
+}
namespace sw {
class VertexRoutinePrototype : public VertexRoutineFunction
{
public:
- VertexRoutinePrototype() : vertex(Arg<0>()), batch(Arg<1>()), task(Arg<2>()), data(Arg<3>()) {}
+ VertexRoutinePrototype()
+ : vertex(Arg<0>())
+ , batch(Arg<1>())
+ , task(Arg<2>())
+ , data(Arg<3>())
+ {}
virtual ~VertexRoutinePrototype() {}
protected:
@@ -41,9 +48,9 @@
{
public:
VertexRoutine(
- const VertexProcessor::State &state,
- vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *spirvShader);
+ const VertexProcessor::State &state,
+ vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *spirvShader);
virtual ~VertexRoutine();
void generate();
@@ -57,15 +64,15 @@
SpirvRoutine routine;
const VertexProcessor::State &state;
- SpirvShader const * const spirvShader;
+ SpirvShader const *const spirvShader;
private:
- virtual void program(Pointer<UInt> &batch, UInt& vertexCount) = 0;
+ virtual void program(Pointer<UInt> &batch, UInt &vertexCount) = 0;
typedef VertexProcessor::State::Input Stream;
Vector4f readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
- bool robustBufferAccess, UInt& robustnessSize, Int baseVertex);
+ bool robustBufferAccess, UInt &robustnessSize, Int baseVertex);
void readInput(Pointer<UInt> &batch);
void computeClipFlags();
void computeCullMask();
@@ -75,4 +82,4 @@
} // namespace sw
-#endif // sw_VertexRoutine_hpp
+#endif // sw_VertexRoutine_hpp