clang-format the src/Pipeline directory Bug: b/144825072 Change-Id: I869aef91d6318bf6955581e5dad762800bd46296 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39655 Tested-by: Ben Clayton <bclayton@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp index 1d6d0a3..4ee8375 100644 --- a/src/Pipeline/ComputeProgram.cpp +++ b/src/Pipeline/ComputeProgram.cpp
@@ -26,16 +26,21 @@ namespace { -enum { X, Y, Z }; +enum +{ + X, + Y, + Z +}; } // anonymous namespace namespace sw { ComputeProgram::ComputeProgram(SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets) - : shader(shader), - pipelineLayout(pipelineLayout), - descriptorSets(descriptorSets) + : shader(shader) + , pipelineLayout(pipelineLayout) + , descriptorSets(descriptorSets) { } @@ -53,46 +58,41 @@ shader->emitEpilog(&routine); } -void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3]) +void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3]) { - routine->setInputBuiltin(shader, spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { auto numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups)); for(uint32_t component = 0; component < builtin.SizeInComponents; component++) { value[builtin.FirstComponent + component] = - As<SIMD::Float>(SIMD::Int(Extract(numWorkgroups, component))); + As<SIMD::Float>(SIMD::Int(Extract(numWorkgroups, component))); } }); - routine->setInputBuiltin(shader, spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { for(uint32_t component = 0; component < builtin.SizeInComponents; component++) { value[builtin.FirstComponent + component] = - As<SIMD::Float>(SIMD::Int(workgroupID[component])); + As<SIMD::Float>(SIMD::Int(workgroupID[component])); } }); - routine->setInputBuiltin(shader, spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { auto workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize)); for(uint32_t component = 0; component < builtin.SizeInComponents; component++) { value[builtin.FirstComponent + component] = - As<SIMD::Float>(SIMD::Int(Extract(workgroupSize, component))); + As<SIMD::Float>(SIMD::Int(Extract(workgroupSize, component))); } }); - routine->setInputBuiltin(shader, spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 1); auto subgroupsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, subgroupsPerWorkgroup)); value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupsPerWorkgroup)); }); - routine->setInputBuiltin(shader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 1); auto invocationsPerSubgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerSubgroup)); value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(invocationsPerSubgroup)); @@ -101,7 +101,7 @@ routine->setImmutableInputBuiltins(shader); } -void ComputeProgram::setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex) +void ComputeProgram::setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex) { Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups)); Int4 workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize)); @@ -114,35 +114,31 @@ { SIMD::Int idx = localInvocationIndex; localInvocationID[Z] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY); - idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo + idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo localInvocationID[Y] = idx / SIMD::Int(workgroupSizeX); - idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX); // modulo + idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX); // modulo localInvocationID[X] = idx; } - routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<SIMD::Float>(localInvocationIndex); }); - routine->setInputBuiltin(shader, spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupIndex)); }); - routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { for(uint32_t component = 0; component < builtin.SizeInComponents; component++) { value[builtin.FirstComponent + component] = - As<SIMD::Float>(localInvocationID[component]); + As<SIMD::Float>(localInvocationID[component]); } }); - routine->setInputBuiltin(shader, spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine->setInputBuiltin(shader, spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { SIMD::Int wgID = 0; wgID = Insert(wgID, workgroupID[X], X); wgID = Insert(wgID, workgroupID[Y], Y); @@ -156,7 +152,7 @@ }); } -void ComputeProgram::emit(SpirvRoutine* routine) +void ComputeProgram::emit(SpirvRoutine *routine) { Pointer<Byte> data = Arg<0>(); Int workgroupX = Arg<1>(); @@ -174,7 +170,7 @@ Int invocationsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerWorkgroup)); - Int workgroupID[3] = {workgroupX, workgroupY, workgroupZ}; + Int workgroupID[3] = { workgroupX, workgroupY, workgroupZ }; setWorkgroupBuiltins(data, routine, workgroupID); For(Int i = 0, i < subgroupCount, i++) @@ -194,11 +190,11 @@ } void ComputeProgram::run( - vk::DescriptorSet::Bindings const &descriptorSets, - vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets, - PushConstantStorage const &pushConstants, - uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, - uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) + vk::DescriptorSet::Bindings const &descriptorSets, + vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets, + PushConstantStorage const &pushConstants, + uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, + uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { auto &modes = shader->getModes(); @@ -231,8 +227,7 @@ for(uint32_t batchID = 0; batchID < batchCount && batchID < groupCount; batchID++) { wg.add(1); - marl::schedule([=, &data] - { + marl::schedule([=, &data] { defer(wg.done()); std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp index 75f0cf4..9b93f4e 100644 --- a/src/Pipeline/ComputeProgram.hpp +++ b/src/Pipeline/ComputeProgram.hpp
@@ -17,13 +17,15 @@ #include "SpirvShader.hpp" -#include "Reactor/Coroutine.hpp" #include "Device/Context.hpp" +#include "Reactor/Coroutine.hpp" #include "Vulkan/VkDescriptorSet.hpp" #include <functional> -namespace vk { class PipelineLayout; } +namespace vk { +class PipelineLayout; +} namespace sw { @@ -34,13 +36,13 @@ // ComputeProgram builds a SPIR-V compute shader. class ComputeProgram : public Coroutine<SpirvShader::YieldResult( - void* data, - int32_t workgroupX, - int32_t workgroupY, - int32_t workgroupZ, - void* workgroupMemory, - int32_t firstSubgroup, - int32_t subgroupCount)> + void *data, + int32_t workgroupX, + int32_t workgroupY, + int32_t workgroupZ, + void *workgroupMemory, + int32_t firstSubgroup, + int32_t subgroupCount)> { public: ComputeProgram(SpirvShader const *spirvShader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets); @@ -52,35 +54,35 @@ // run executes the compute shader routine for all workgroups. void run( - vk::DescriptorSet::Bindings const &descriptorSetBindings, - vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets, - PushConstantStorage const &pushConstants, - uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, - uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); + vk::DescriptorSet::Bindings const &descriptorSetBindings, + vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets, + PushConstantStorage const &pushConstants, + uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, + uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); protected: - void emit(SpirvRoutine* routine); - void setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3]); - void setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex); + void emit(SpirvRoutine *routine); + void setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3]); + void setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex); struct Data { vk::DescriptorSet::Bindings descriptorSets; vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets; - uint4 numWorkgroups; // [x, y, z, 0] - uint4 workgroupSize; // [x, y, z, 0] - uint32_t invocationsPerSubgroup; // SPIR-V: "SubgroupSize" - uint32_t subgroupsPerWorkgroup; // SPIR-V: "NumSubgroups" - uint32_t invocationsPerWorkgroup; // Total number of invocations per workgroup. + uint4 numWorkgroups; // [x, y, z, 0] + uint4 workgroupSize; // [x, y, z, 0] + uint32_t invocationsPerSubgroup; // SPIR-V: "SubgroupSize" + uint32_t subgroupsPerWorkgroup; // SPIR-V: "NumSubgroups" + uint32_t invocationsPerWorkgroup; // Total number of invocations per workgroup. PushConstantStorage pushConstants; const Constants *constants; }; - SpirvShader const * const shader; - vk::PipelineLayout const * const pipelineLayout; + SpirvShader const *const shader; + vk::PipelineLayout const *const pipelineLayout; const vk::DescriptorSet::Bindings &descriptorSets; }; } // namespace sw -#endif // sw_ComputeProgram_hpp +#endif // sw_ComputeProgram_hpp
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp index 6eaf92d..1f98769 100644 --- a/src/Pipeline/Constants.cpp +++ b/src/Pipeline/Constants.cpp
@@ -14,8 +14,8 @@ #include "Constants.hpp" -#include "System/Math.hpp" #include "System/Half.hpp" +#include "System/Math.hpp" #include <cstring> @@ -25,8 +25,7 @@ Constants::Constants() { - static const unsigned int transposeBit0[16] = - { + static const unsigned int transposeBit0[16] = { 0x00000000, 0x00000001, 0x00000010, @@ -45,8 +44,7 @@ 0x00001111 }; - static const unsigned int transposeBit1[16] = - { + static const unsigned int transposeBit1[16] = { 0x00000000, 0x00000002, 0x00000020, @@ -65,8 +63,7 @@ 0x00002222 }; - static const unsigned int transposeBit2[16] = - { + static const unsigned int transposeBit2[16] = { 0x00000000, 0x00000004, 0x00000040, @@ -89,74 +86,71 @@ memcpy(&this->transposeBit1, transposeBit1, sizeof(transposeBit1)); memcpy(&this->transposeBit2, transposeBit2, sizeof(transposeBit2)); - static const ushort4 cWeight[17] = - { - {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}, // 0xFFFF / 1 = 0xFFFF - {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}, // 0xFFFF / 1 = 0xFFFF - {0x8000, 0x8000, 0x8000, 0x8000}, // 0xFFFF / 2 = 0x8000 - {0x5555, 0x5555, 0x5555, 0x5555}, // 0xFFFF / 3 = 0x5555 - {0x4000, 0x4000, 0x4000, 0x4000}, // 0xFFFF / 4 = 0x4000 - {0x3333, 0x3333, 0x3333, 0x3333}, // 0xFFFF / 5 = 0x3333 - {0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA}, // 0xFFFF / 6 = 0x2AAA - {0x2492, 0x2492, 0x2492, 0x2492}, // 0xFFFF / 7 = 0x2492 - {0x2000, 0x2000, 0x2000, 0x2000}, // 0xFFFF / 8 = 0x2000 - {0x1C71, 0x1C71, 0x1C71, 0x1C71}, // 0xFFFF / 9 = 0x1C71 - {0x1999, 0x1999, 0x1999, 0x1999}, // 0xFFFF / 10 = 0x1999 - {0x1745, 0x1745, 0x1745, 0x1745}, // 0xFFFF / 11 = 0x1745 - {0x1555, 0x1555, 0x1555, 0x1555}, // 0xFFFF / 12 = 0x1555 - {0x13B1, 0x13B1, 0x13B1, 0x13B1}, // 0xFFFF / 13 = 0x13B1 - {0x1249, 0x1249, 0x1249, 0x1249}, // 0xFFFF / 14 = 0x1249 - {0x1111, 0x1111, 0x1111, 0x1111}, // 0xFFFF / 15 = 0x1111 - {0x1000, 0x1000, 0x1000, 0x1000}, // 0xFFFF / 16 = 0x1000 + static const ushort4 cWeight[17] = { + { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }, // 0xFFFF / 1 = 0xFFFF + { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }, // 0xFFFF / 1 = 0xFFFF + { 0x8000, 0x8000, 0x8000, 0x8000 }, // 0xFFFF / 2 = 0x8000 + { 0x5555, 0x5555, 0x5555, 0x5555 }, // 0xFFFF / 3 = 0x5555 + { 0x4000, 0x4000, 0x4000, 0x4000 }, // 0xFFFF / 4 = 0x4000 + { 0x3333, 0x3333, 0x3333, 0x3333 }, // 0xFFFF / 5 = 0x3333 + { 0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA }, // 0xFFFF / 6 = 0x2AAA + { 0x2492, 0x2492, 0x2492, 0x2492 }, // 0xFFFF / 7 = 0x2492 + { 0x2000, 0x2000, 0x2000, 0x2000 }, // 0xFFFF / 8 = 0x2000 + { 0x1C71, 0x1C71, 0x1C71, 0x1C71 }, // 0xFFFF / 9 = 0x1C71 + { 0x1999, 0x1999, 0x1999, 0x1999 }, // 0xFFFF / 10 = 0x1999 + { 0x1745, 0x1745, 0x1745, 0x1745 }, // 0xFFFF / 11 = 0x1745 + { 0x1555, 0x1555, 0x1555, 0x1555 }, // 0xFFFF / 12 = 0x1555 + { 0x13B1, 0x13B1, 0x13B1, 0x13B1 }, // 0xFFFF / 13 = 0x13B1 + { 0x1249, 0x1249, 0x1249, 0x1249 }, // 0xFFFF / 14 = 0x1249 + { 0x1111, 0x1111, 0x1111, 0x1111 }, // 0xFFFF / 15 = 0x1111 + { 0x1000, 0x1000, 0x1000, 0x1000 }, // 0xFFFF / 16 = 0x1000 }; - static const float4 uvWeight[17] = - { - {1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f}, - {1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f}, - {1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f}, - {1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f}, - {1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f}, - {1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f}, - {1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f}, - {1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f}, - {1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f}, - {1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f}, - {1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f}, - {1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f}, - {1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f}, - {1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f}, - {1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f}, - {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, - {1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f}, + static const float4 uvWeight[17] = { + { 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f }, + { 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f }, + { 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f }, + { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f }, + { 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f }, + { 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f }, + { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f }, + { 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f }, + { 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f }, + { 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f }, + { 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f }, + { 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f }, + { 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f }, + { 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f }, + { 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f }, + { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f }, + { 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f }, }; - static const float4 uvStart[17] = - { - {-0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f}, - {-0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f}, - {-1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f}, - {-2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f}, - {-3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f}, - {-4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f}, - {-5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f}, - {-6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f}, - {-7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f}, - {-8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f}, - {-9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f}, - {-10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f}, - {-11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f}, - {-12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f}, - {-13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f}, - {-14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f}, - {-15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f}, + static const float4 uvStart[17] = { + { -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f }, + { -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f }, + { -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f }, + { -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f }, + { -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f }, + { -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f }, + { -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f }, + { -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f }, + { -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f }, + { -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f }, + { -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f }, + { -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f }, + { -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f }, + { -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f }, + { -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f }, + { -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f }, + { -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f }, }; memcpy(&this->cWeight, cWeight, sizeof(cWeight)); memcpy(&this->uvWeight, uvWeight, sizeof(uvWeight)); memcpy(&this->uvStart, uvStart, sizeof(uvStart)); - static const unsigned int occlusionCount[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; + static const unsigned int occlusionCount[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; memcpy(&this->occlusionCount, &occlusionCount, sizeof(occlusionCount)); @@ -253,24 +247,24 @@ for(int i = 0; i < 4; i++) { - maskW01Q[i][0] = -(i >> 0 & 1); - maskW01Q[i][1] = -(i >> 1 & 1); - maskW01Q[i][2] = -(i >> 0 & 1); - maskW01Q[i][3] = -(i >> 1 & 1); + maskW01Q[i][0] = -(i >> 0 & 1); + maskW01Q[i][1] = -(i >> 1 & 1); + maskW01Q[i][2] = -(i >> 0 & 1); + maskW01Q[i][3] = -(i >> 1 & 1); - maskD01X[i][0] = -(i >> 0 & 1); - maskD01X[i][1] = -(i >> 1 & 1); - maskD01X[i][2] = -(i >> 0 & 1); - maskD01X[i][3] = -(i >> 1 & 1); + maskD01X[i][0] = -(i >> 0 & 1); + maskD01X[i][1] = -(i >> 1 & 1); + maskD01X[i][2] = -(i >> 0 & 1); + maskD01X[i][3] = -(i >> 1 & 1); } for(int i = 0; i < 16; i++) { mask10Q[i][0] = mask10Q[i][1] = - (i & 0x1 ? 0x3FF : 0) | - (i & 0x2 ? 0xFFC00 : 0) | - (i & 0x4 ? 0x3FF00000 : 0) | - (i & 0x8 ? 0xC0000000 : 0); + (i & 0x1 ? 0x3FF : 0) | + (i & 0x2 ? 0xFFC00 : 0) | + (i & 0x4 ? 0x3FF00000 : 0) | + (i & 0x8 ? 0xC0000000 : 0); } for(int i = 0; i < 256; i++) @@ -327,13 +321,13 @@ memcpy(&this->X, &X, sizeof(X)); memcpy(&this->Y, &Y, sizeof(Y)); - const dword maxX[16] = {0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101}; - const dword maxY[16] = {0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202}; - const dword maxZ[16] = {0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404}; - const dword minX[16] = {0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808}; - const dword minY[16] = {0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010}; - const dword minZ[16] = {0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020}; - const dword fini[16] = {0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080}; + const dword maxX[16] = { 0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101 }; + const dword maxY[16] = { 0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202 }; + const dword maxZ[16] = { 0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404 }; + const dword minX[16] = { 0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808 }; + const dword minY[16] = { 0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010 }; + const dword minZ[16] = { 0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020 }; + const dword fini[16] = { 0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080 }; memcpy(&this->maxX, &maxX, sizeof(maxX)); memcpy(&this->maxY, &maxY, sizeof(maxY)); @@ -343,17 +337,17 @@ memcpy(&this->minZ, &minZ, sizeof(minZ)); memcpy(&this->fini, &fini, sizeof(fini)); - static const dword4 maxPos = {0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE}; + static const dword4 maxPos = { 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE }; memcpy(&this->maxPos, &maxPos, sizeof(maxPos)); - static const float4 unscaleByte = {1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF}; - static const float4 unscaleSByte = {1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F}; - static const float4 unscaleShort = {1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF}; - static const float4 unscaleUShort = {1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF}; - static const float4 unscaleInt = {1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF}; - static const float4 unscaleUInt = {1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF}; - static const float4 unscaleFixed = {1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000}; + static const float4 unscaleByte = { 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF }; + static const float4 unscaleSByte = { 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F }; + static const float4 unscaleShort = { 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF }; + static const float4 unscaleUShort = { 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF }; + static const float4 unscaleInt = { 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF }; + static const float4 unscaleUInt = { 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF }; + static const float4 unscaleFixed = { 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000 }; memcpy(&this->unscaleByte, &unscaleByte, sizeof(unscaleByte)); memcpy(&this->unscaleSByte, &unscaleSByte, sizeof(unscaleSByte)); @@ -365,7 +359,7 @@ for(int i = 0; i <= 0xFFFF; i++) { - half2float[i] = (float)reinterpret_cast<half&>(i); + half2float[i] = (float)reinterpret_cast<half &>(i); } }
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp index 484682b..c9c8a1f 100644 --- a/src/Pipeline/Constants.hpp +++ b/src/Pipeline/Constants.hpp
@@ -15,8 +15,8 @@ #ifndef sw_Constants_hpp #define sw_Constants_hpp -#include "System/Types.hpp" #include "System/Math.hpp" +#include "System/Types.hpp" #include "Vulkan/VkConfig.h" namespace sw { @@ -68,8 +68,8 @@ word4 maskW01Q[4]; dword4 maskD01X[4]; word4 mask565Q[8]; - dword2 mask10Q[16]; // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x - word4 mask5551Q[16]; // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x + dword2 mask10Q[16]; // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x + word4 mask5551Q[16]; // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x unsigned short sRGBtoLinear8_16[256]; @@ -91,10 +91,10 @@ // VK_SAMPLE_COUNT_4_BIT // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#primsrast-multisampling static constexpr float VkSampleLocations4[][2] = { - {0.375, 0.125}, - {0.875, 0.375}, - {0.125, 0.625}, - {0.625, 0.875}, + { 0.375, 0.125 }, + { 0.875, 0.375 }, + { 0.125, 0.625 }, + { 0.625, 0.875 }, }; // Vulkan spec sample positions are relative to 0,0 in top left corner, with Y+ going down. @@ -141,6 +141,6 @@ extern Constants constants; -} // namepsace sw +} // namespace sw -#endif // sw_Constants_hpp +#endif // sw_Constants_hpp
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp index 332f82e..21296ed 100644 --- a/src/Pipeline/PixelProgram.cpp +++ b/src/Pipeline/PixelProgram.cpp
@@ -56,48 +56,43 @@ return mask; } -void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]) +void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]) { routine.setImmutableInputBuiltins(spirvShader); - routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { assert(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID))))); }); - routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { assert(builtin.SizeInComponents == 4); - value[builtin.FirstComponent+0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f); - value[builtin.FirstComponent+1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f); - value[builtin.FirstComponent+2] = z[0]; // sample 0 - value[builtin.FirstComponent+3] = w; + value[builtin.FirstComponent + 0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f); + value[builtin.FirstComponent + 1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f); + value[builtin.FirstComponent + 2] = z[0]; // sample 0 + value[builtin.FirstComponent + 3] = w; }); - routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { assert(builtin.SizeInComponents == 2); - value[builtin.FirstComponent+0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) + - SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX)))); - value[builtin.FirstComponent+1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) + - SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY)))); + value[builtin.FirstComponent + 0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) + + SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX)))); + value[builtin.FirstComponent + 1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) + + SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY)))); }); - routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { assert(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width)); }); - routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { assert(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask)); }); - routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1); - routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1); + routine.windowSpacePosition[0] = x + SIMD::Int(0, 1, 0, 1); + routine.windowSpacePosition[1] = y + SIMD::Int(0, 0, 1, 1); routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID)); } @@ -169,7 +164,7 @@ for(auto i = 0u; i < state.multiSample; i++) { - cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1<<i), SIMD::Int(0))); + cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << i), SIMD::Int(0))); } } @@ -211,78 +206,78 @@ auto format = state.targetFormat[index]; switch(format) { - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - case VK_FORMAT_R5G6B5_UNORM_PACK16: - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R16G16B16A16_UNORM: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_A8B8G8R8_SRGB_PACK32: - case VK_FORMAT_A2B10G10R10_UNORM_PACK32: - for(unsigned int q = 0; q < state.multiSample; q++) - { - if(state.multiSampleMask & (1 << q)) + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8B8A8_SRGB: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8_UNORM: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16B16A16_UNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + for(unsigned int q = 0; q < state.multiSample; q++) { - Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); - Vector4s color; + if(state.multiSampleMask & (1 << q)) + { + Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); + Vector4s color; - color.x = convertFixed16(c[index].x, false); - color.y = convertFixed16(c[index].y, false); - color.z = convertFixed16(c[index].z, false); - color.w = convertFixed16(c[index].w, false); + color.x = convertFixed16(c[index].x, false); + color.y = convertFixed16(c[index].y, false); + color.z = convertFixed16(c[index].z, false); + color.w = convertFixed16(c[index].w, false); - alphaBlend(index, buffer, color, x); - writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); + alphaBlend(index, buffer, color, x); + writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); + } } - } - break; - case VK_FORMAT_R16_SFLOAT: - case VK_FORMAT_R16G16_SFLOAT: - case VK_FORMAT_R16G16B16A16_SFLOAT: - case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_R32G32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32_SINT: - case VK_FORMAT_R32G32_SINT: - case VK_FORMAT_R32G32B32A32_SINT: - case VK_FORMAT_R32_UINT: - case VK_FORMAT_R32G32_UINT: - case VK_FORMAT_R32G32B32A32_UINT: - case VK_FORMAT_R16_SINT: - case VK_FORMAT_R16G16_SINT: - case VK_FORMAT_R16G16B16A16_SINT: - case VK_FORMAT_R16_UINT: - case VK_FORMAT_R16G16_UINT: - case VK_FORMAT_R16G16B16A16_UINT: - case VK_FORMAT_R8_SINT: - case VK_FORMAT_R8G8_SINT: - case VK_FORMAT_R8G8B8A8_SINT: - case VK_FORMAT_R8_UINT: - case VK_FORMAT_R8G8_UINT: - case VK_FORMAT_R8G8B8A8_UINT: - case VK_FORMAT_A8B8G8R8_UINT_PACK32: - case VK_FORMAT_A8B8G8R8_SINT_PACK32: - case VK_FORMAT_A2B10G10R10_UINT_PACK32: - for(unsigned int q = 0; q < state.multiSample; q++) - { - if(state.multiSampleMask & (1 << q)) + break; + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_R16G16B16A16_SFLOAT: + case VK_FORMAT_B10G11R11_UFLOAT_PACK32: + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R32G32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R32_SINT: + case VK_FORMAT_R32G32_SINT: + case VK_FORMAT_R32G32B32A32_SINT: + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32G32_UINT: + case VK_FORMAT_R32G32B32A32_UINT: + case VK_FORMAT_R16_SINT: + case VK_FORMAT_R16G16_SINT: + case VK_FORMAT_R16G16B16A16_SINT: + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16G16_UINT: + case VK_FORMAT_R16G16B16A16_UINT: + case VK_FORMAT_R8_SINT: + case VK_FORMAT_R8G8_SINT: + case VK_FORMAT_R8G8B8A8_SINT: + case VK_FORMAT_R8_UINT: + case VK_FORMAT_R8G8_UINT: + case VK_FORMAT_R8G8B8A8_UINT: + case VK_FORMAT_A8B8G8R8_UINT_PACK32: + case VK_FORMAT_A8B8G8R8_SINT_PACK32: + case VK_FORMAT_A2B10G10R10_UINT_PACK32: + for(unsigned int q = 0; q < state.multiSample; q++) { - Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); - Vector4f color = c[index]; + if(state.multiSampleMask & (1 << q)) + { + Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); + Vector4f color = c[index]; - alphaBlend(index, buffer, color, x); - writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); + alphaBlend(index, buffer, color, x); + writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); + } } - } - break; - default: - UNIMPLEMENTED("VkFormat: %d", int(format)); + break; + default: + UNIMPLEMENTED("VkFormat: %d", int(format)); } } } @@ -298,62 +293,66 @@ switch(state.targetFormat[index]) { - case VK_FORMAT_UNDEFINED: - break; - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - case VK_FORMAT_R5G6B5_UNORM_PACK16: - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R16G16B16A16_UNORM: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_A8B8G8R8_SRGB_PACK32: - case VK_FORMAT_A2B10G10R10_UNORM_PACK32: - oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); - oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); - oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); - oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); - break; - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_R32G32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32_SINT: - case VK_FORMAT_R32G32_SINT: - case VK_FORMAT_R32G32B32A32_SINT: - case VK_FORMAT_R32_UINT: - case VK_FORMAT_R32G32_UINT: - case VK_FORMAT_R32G32B32A32_UINT: - case VK_FORMAT_R16_SFLOAT: - case VK_FORMAT_R16G16_SFLOAT: - case VK_FORMAT_R16G16B16A16_SFLOAT: - case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - case VK_FORMAT_R16_SINT: - case VK_FORMAT_R16G16_SINT: - case VK_FORMAT_R16G16B16A16_SINT: - case VK_FORMAT_R16_UINT: - case VK_FORMAT_R16G16_UINT: - case VK_FORMAT_R16G16B16A16_UINT: - case VK_FORMAT_R8_SINT: - case VK_FORMAT_R8G8_SINT: - case VK_FORMAT_R8G8B8A8_SINT: - case VK_FORMAT_R8_UINT: - case VK_FORMAT_R8G8_UINT: - case VK_FORMAT_R8G8B8A8_UINT: - case VK_FORMAT_A8B8G8R8_UINT_PACK32: - case VK_FORMAT_A8B8G8R8_SINT_PACK32: - case VK_FORMAT_A2B10G10R10_UINT_PACK32: - break; - default: - UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); + case VK_FORMAT_UNDEFINED: + break; + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8B8A8_SRGB: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8_UNORM: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16B16A16_UNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + oC[index].x = Max(oC[index].x, Float4(0.0f)); + oC[index].x = Min(oC[index].x, Float4(1.0f)); + oC[index].y = Max(oC[index].y, Float4(0.0f)); + oC[index].y = Min(oC[index].y, Float4(1.0f)); + oC[index].z = Max(oC[index].z, Float4(0.0f)); + oC[index].z = Min(oC[index].z, Float4(1.0f)); + oC[index].w = Max(oC[index].w, Float4(0.0f)); + oC[index].w = Min(oC[index].w, Float4(1.0f)); + break; + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R32G32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R32_SINT: + case VK_FORMAT_R32G32_SINT: + case VK_FORMAT_R32G32B32A32_SINT: + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32G32_UINT: + case VK_FORMAT_R32G32B32A32_UINT: + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_R16G16B16A16_SFLOAT: + case VK_FORMAT_B10G11R11_UFLOAT_PACK32: + case VK_FORMAT_R16_SINT: + case VK_FORMAT_R16G16_SINT: + case VK_FORMAT_R16G16B16A16_SINT: + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R16G16_UINT: + case VK_FORMAT_R16G16B16A16_UINT: + case VK_FORMAT_R8_SINT: + case VK_FORMAT_R8G8_SINT: + case VK_FORMAT_R8G8B8A8_SINT: + case VK_FORMAT_R8_UINT: + case VK_FORMAT_R8G8_UINT: + case VK_FORMAT_R8G8B8A8_UINT: + case VK_FORMAT_A8B8G8R8_UINT_PACK32: + case VK_FORMAT_A8B8G8R8_SINT_PACK32: + case VK_FORMAT_A2B10G10R10_UINT_PACK32: + break; + default: + UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); } } } -Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) +Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) { Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f); @@ -361,4 +360,4 @@ return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f)); } -} // namepsace sw +} // namespace sw
diff --git a/src/Pipeline/PixelProgram.hpp b/src/Pipeline/PixelProgram.hpp index 7888115..59994fb 100644 --- a/src/Pipeline/PixelProgram.hpp +++ b/src/Pipeline/PixelProgram.hpp
@@ -23,18 +23,18 @@ { public: PixelProgram( - const PixelProcessor::State &state, - vk::PipelineLayout const *pipelineLayout, - SpirvShader const *spirvShader, - const vk::DescriptorSet::Bindings &descriptorSets) : - PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets) + const PixelProcessor::State &state, + vk::PipelineLayout const *pipelineLayout, + SpirvShader const *spirvShader, + const vk::DescriptorSet::Bindings &descriptorSets) + : PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets) { } virtual ~PixelProgram() {} protected: - virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]); + virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]); virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]); virtual Bool alphaTest(Int cMask[4]); virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp index 6b06146..b2ae9d0 100644 --- a/src/Pipeline/PixelRoutine.cpp +++ b/src/Pipeline/PixelRoutine.cpp
@@ -14,24 +14,24 @@ #include "PixelRoutine.hpp" -#include "SamplerCore.hpp" #include "Constants.hpp" -#include "Device/Renderer.hpp" -#include "Device/QuadRasterizer.hpp" +#include "SamplerCore.hpp" #include "Device/Primitive.hpp" +#include "Device/QuadRasterizer.hpp" +#include "Device/Renderer.hpp" #include "Vulkan/VkDebug.hpp" #include "Vulkan/VkPipelineLayout.hpp" namespace sw { PixelRoutine::PixelRoutine( - const PixelProcessor::State &state, - vk::PipelineLayout const *pipelineLayout, - SpirvShader const *spirvShader, - const vk::DescriptorSet::Bindings &descriptorSets) - : QuadRasterizer(state, spirvShader), - routine(pipelineLayout), - descriptorSets(descriptorSets) + const PixelProcessor::State &state, + vk::PipelineLayout const *pipelineLayout, + SpirvShader const *spirvShader, + const vk::DescriptorSet::Bindings &descriptorSets) + : QuadRasterizer(state, spirvShader) + , routine(pipelineLayout) + , descriptorSets(descriptorSets) { if(spirvShader) { @@ -55,8 +55,8 @@ // TODO: consider shader which modifies sample mask in general const bool earlyDepthTest = !spirvShader || (spirvShader->getModes().EarlyFragmentTests && !spirvShader->getModes().DepthReplacing && !state.alphaToCoverage); - Int zMask[4]; // Depth mask - Int sMask[4]; // Stencil mask + Int zMask[4]; // Depth mask + Int sMask[4]; // Stencil mask for(unsigned int q = 0; q < state.multiSample; q++) { @@ -72,7 +72,7 @@ Float4 f; Float4 rhwCentroid; - Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16); + Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16); if(interpolateZ()) { @@ -82,10 +82,10 @@ if(state.multiSample > 1) { - x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4)); + x -= *Pointer<Float4>(constants + OFFSET(Constants, X) + q * sizeof(float4)); } - z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp); + z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive, z), false, false, state.depthClamp); } } @@ -101,7 +101,7 @@ If(depthPass || Bool(!earlyDepthTest)) { - Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16); + Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16); // Centroid locations Float4 XXXX = Float4(0.0f); @@ -113,9 +113,9 @@ for(unsigned int q = 0; q < state.multiSample; q++) { - XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]); - YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]); - WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]); + XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]); + YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]); + WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]); } WWWW = Rcp_pp(WWWW); @@ -128,12 +128,12 @@ if(interpolateW()) { - w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false); + w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive, w), false, false, false); rhw = reciprocal(w, false, false, true); if(state.centroid) { - rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false)); + rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, w), false, false)); } } @@ -147,16 +147,16 @@ if(input.Centroid && state.multiSample > 1) { routine.inputs[interpolant] = - interpolateCentroid(XXXX, YYYY, rhwCentroid, - primitive + OFFSET(Primitive, V[interpolant]), - input.Flat, !input.NoPerspective); + interpolateCentroid(XXXX, YYYY, rhwCentroid, + primitive + OFFSET(Primitive, V[interpolant]), + input.Flat, !input.NoPerspective); } else { routine.inputs[interpolant] = - interpolate(xxxx, Dv[interpolant], rhw, - primitive + OFFSET(Primitive, V[interpolant]), - input.Flat, !input.NoPerspective, false); + interpolate(xxxx, Dv[interpolant], rhw, + primitive + OFFSET(Primitive, V[interpolant]), + input.Flat, !input.NoPerspective, false); } } } @@ -166,8 +166,8 @@ for(uint32_t i = 0; i < state.numClipDistances; i++) { auto distance = interpolate(xxxx, DclipDistance[i], rhw, - primitive + OFFSET(Primitive, clipDistance[i]), - false, true, false); + primitive + OFFSET(Primitive, clipDistance[i]), + false, true, false); auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0))); for(auto ms = 0u; ms < state.multiSample; ms++) @@ -202,9 +202,9 @@ if(i < it->second.SizeInComponents) { routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = - interpolate(xxxx, DcullDistance[i], rhw, - primitive + OFFSET(Primitive, cullDistance[i]), - false, true, false); + interpolate(xxxx, DcullDistance[i], rhw, + primitive + OFFSET(Primitive, cullDistance[i]), + false, true, false); } } } @@ -250,7 +250,7 @@ if(state.occlusionEnabled) { - occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q])); + occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q])); } } } @@ -271,12 +271,12 @@ Float4 PixelRoutine::interpolateCentroid(const Float4 &x, const Float4 &y, const Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective) { - Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16); + Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, C), 16); if(!flat) { - interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) + - y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16); + interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16) + + y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, B), 16); if(perspective) { @@ -300,7 +300,7 @@ if(q > 0) { - buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); + buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB)); } Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB)); @@ -310,20 +310,20 @@ if(state.frontStencil.compareMask != 0xff) { - value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ)); + value &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].testMaskQ)); } stencilTest(value, state.frontStencil.compareOp, false); if(state.backStencil.compareMask != 0xff) { - valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ)); + valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].testMaskQ)); } stencilTest(valueBack, state.backStencil.compareOp, true); - value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); - valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); + value &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)); + valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)); value |= valueBack; sMask = SignMask(value) & cMask; @@ -335,43 +335,43 @@ switch(stencilCompareMode) { - case VK_COMPARE_OP_ALWAYS: - value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - break; - case VK_COMPARE_OP_NEVER: - value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - break; - case VK_COMPARE_OP_LESS: // a < b ~ b > a - value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ))); - break; - case VK_COMPARE_OP_EQUAL: - value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ))); - break; - case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b) - value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ))); - value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - break; - case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ (b > a) || (a == b) - equal = value; - equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ))); - value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ))); - value |= equal; - break; - case VK_COMPARE_OP_GREATER: // a > b - equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)); - value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - equal = CmpGT(As<SByte8>(equal), As<SByte8>(value)); - value = equal; - break; - case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ !(a < b) ~ !(b > a) - value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ))); - value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - break; - default: - UNIMPLEMENTED("VkCompareOp: %d", int(stencilCompareMode)); + case VK_COMPARE_OP_ALWAYS: + value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + break; + case VK_COMPARE_OP_NEVER: + value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + break; + case VK_COMPARE_OP_LESS: // a < b ~ b > a + value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); + value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ))); + break; + case VK_COMPARE_OP_EQUAL: + value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ))); + break; + case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b) + value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ))); + value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + break; + case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ (b > a) || (a == b) + equal = value; + equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ))); + value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); + value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ))); + value |= equal; + break; + case VK_COMPARE_OP_GREATER: // a > b + equal = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)); + value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); + equal = CmpGT(As<SByte8>(equal), As<SByte8>(value)); + value = equal; + break; + case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ !(a < b) ~ !(b > a) + value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); + value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ))); + value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + break; + default: + UNIMPLEMENTED("VkCompareOp: %d", int(stencilCompareMode)); } } @@ -385,11 +385,11 @@ } Pointer<Byte> buffer = zBuffer + 4 * x; - Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); + Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB)); if(q > 0) { - buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); + buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB)); } Float4 zValue; @@ -405,45 +405,45 @@ switch(state.depthCompareMode) { - case VK_COMPARE_OP_ALWAYS: - // Optimized - break; - case VK_COMPARE_OP_NEVER: - // Optimized - break; - case VK_COMPARE_OP_EQUAL: - zTest = CmpEQ(zValue, Z); - break; - case VK_COMPARE_OP_NOT_EQUAL: - zTest = CmpNEQ(zValue, Z); - break; - case VK_COMPARE_OP_LESS: - zTest = CmpNLE(zValue, Z); - break; - case VK_COMPARE_OP_GREATER_OR_EQUAL: - zTest = CmpLE(zValue, Z); - break; - case VK_COMPARE_OP_LESS_OR_EQUAL: - zTest = CmpNLT(zValue, Z); - break; - case VK_COMPARE_OP_GREATER: - zTest = CmpLT(zValue, Z); - break; - default: - UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode)); + case VK_COMPARE_OP_ALWAYS: + // Optimized + break; + case VK_COMPARE_OP_NEVER: + // Optimized + break; + case VK_COMPARE_OP_EQUAL: + zTest = CmpEQ(zValue, Z); + break; + case VK_COMPARE_OP_NOT_EQUAL: + zTest = CmpNEQ(zValue, Z); + break; + case VK_COMPARE_OP_LESS: + zTest = CmpNLE(zValue, Z); + break; + case VK_COMPARE_OP_GREATER_OR_EQUAL: + zTest = CmpLE(zValue, Z); + break; + case VK_COMPARE_OP_LESS_OR_EQUAL: + zTest = CmpNLT(zValue, Z); + break; + case VK_COMPARE_OP_GREATER: + zTest = CmpLT(zValue, Z); + break; + default: + UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode)); } switch(state.depthCompareMode) { - case VK_COMPARE_OP_ALWAYS: - zMask = cMask; - break; - case VK_COMPARE_OP_NEVER: - zMask = 0x0; - break; - default: - zMask = SignMask(zTest) & cMask; - break; + case VK_COMPARE_OP_ALWAYS: + zMask = cMask; + break; + case VK_COMPARE_OP_NEVER: + zMask = 0x0; + break; + default: + zMask = SignMask(zTest) & cMask; + break; } if(state.stencilActive) @@ -468,7 +468,7 @@ if(q > 0) { - buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); + buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB)); } Short4 zValue; @@ -488,45 +488,45 @@ switch(state.depthCompareMode) { - case VK_COMPARE_OP_ALWAYS: - // Optimized - break; - case VK_COMPARE_OP_NEVER: - // Optimized - break; - case VK_COMPARE_OP_EQUAL: - zTest = Int4(CmpEQ(zValue, Z)); - break; - case VK_COMPARE_OP_NOT_EQUAL: - zTest = ~Int4(CmpEQ(zValue, Z)); - break; - case VK_COMPARE_OP_LESS: - zTest = Int4(CmpGT(zValue, Z)); - break; - case VK_COMPARE_OP_GREATER_OR_EQUAL: - zTest = ~Int4(CmpGT(zValue, Z)); - break; - case VK_COMPARE_OP_LESS_OR_EQUAL: - zTest = ~Int4(CmpGT(Z, zValue)); - break; - case VK_COMPARE_OP_GREATER: - zTest = Int4(CmpGT(Z, zValue)); - break; - default: - UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode)); + case VK_COMPARE_OP_ALWAYS: + // Optimized + break; + case VK_COMPARE_OP_NEVER: + // Optimized + break; + case VK_COMPARE_OP_EQUAL: + zTest = Int4(CmpEQ(zValue, Z)); + break; + case VK_COMPARE_OP_NOT_EQUAL: + zTest = ~Int4(CmpEQ(zValue, Z)); + break; + case VK_COMPARE_OP_LESS: + zTest = Int4(CmpGT(zValue, Z)); + break; + case VK_COMPARE_OP_GREATER_OR_EQUAL: + zTest = ~Int4(CmpGT(zValue, Z)); + break; + case VK_COMPARE_OP_LESS_OR_EQUAL: + zTest = ~Int4(CmpGT(Z, zValue)); + break; + case VK_COMPARE_OP_GREATER: + zTest = Int4(CmpGT(Z, zValue)); + break; + default: + UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode)); } switch(state.depthCompareMode) { - case VK_COMPARE_OP_ALWAYS: - zMask = cMask; - break; - case VK_COMPARE_OP_NEVER: - zMask = 0x0; - break; - default: - zMask = SignMask(zTest) & cMask; - break; + case VK_COMPARE_OP_ALWAYS: + zMask = cMask; + break; + case VK_COMPARE_OP_NEVER: + zMask = 0x0; + break; + default: + zMask = SignMask(zTest) & cMask; + break; } if(state.stencilActive) @@ -552,10 +552,10 @@ void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha) { - Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0))); - Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1))); - Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2))); - Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3))); + Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c0))); + Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c1))); + Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c2))); + Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c3))); Int aMask0 = SignMask(coverage0); Int aMask1 = SignMask(coverage1); @@ -578,11 +578,11 @@ } Pointer<Byte> buffer = zBuffer + 4 * x; - Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); + Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB)); if(q > 0) { - buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); + buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB)); } Float4 zValue; @@ -594,8 +594,8 @@ zValue.zw = *Pointer<Float4>(buffer + pitch - 8); } - Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16)); - zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16)); + Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + zMask * 16, 16)); + zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + zMask * 16, 16)); Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue)); // FIXME: Properly optimizes? @@ -613,11 +613,11 @@ } Pointer<Byte> buffer = zBuffer + 2 * x; - Int pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); + Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB)); if(q > 0) { - buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); + buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB)); } Short4 zValue; @@ -629,15 +629,15 @@ zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1)); } - Z = Z & *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q) + zMask * 8, 8); - zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q) + zMask * 8, 8); + Z = Z & *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q) + zMask * 8, 8); + zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q) + zMask * 8, 8); Z = Z | zValue; // FIXME: Properly optimizes? *Pointer<Short>(buffer) = Extract(Z, 0); - *Pointer<Short>(buffer+2) = Extract(Z, 1); - *Pointer<Short>(buffer+pitch) = Extract(Z, 2); - *Pointer<Short>(buffer+pitch+2) = Extract(Z, 3); + *Pointer<Short>(buffer + 2) = Extract(Z, 1); + *Pointer<Short>(buffer + pitch) = Extract(Z, 2); + *Pointer<Short>(buffer + pitch + 2) = Extract(Z, 3); } void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask) @@ -677,7 +677,7 @@ if(q > 0) { - buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); + buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB)); } Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB)); @@ -686,11 +686,11 @@ Byte8 newValue; stencilOperation(newValue, bufferValue, state.frontStencil, false, zMask, sMask); - if((state.frontStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer + if((state.frontStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer { Byte8 maskedValue = bufferValue; - newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ)); - maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ)); + newValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].writeMaskQ)); + maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].invWriteMaskQ)); newValue |= maskedValue; } @@ -698,20 +698,20 @@ stencilOperation(newValueBack, bufferValue, state.backStencil, true, zMask, sMask); - if((state.backStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer + if((state.backStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer { Byte8 maskedValue = bufferValue; - newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ)); - maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ)); + newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].writeMaskQ)); + maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].invWriteMaskQ)); newValueBack |= maskedValue; } - newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); - newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); + newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)); + newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)); newValue |= newValueBack; - newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask); - bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask); + newValue &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * cMask); + bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * cMask); newValue |= bufferValue; *Pointer<Short>(buffer) = Extract(As<Short4>(newValue), 0); @@ -738,15 +738,15 @@ if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp) { - if(state.depthTestActive && ops.depthFailOp != ops.passOp) // zMask valid and values not the same + if(state.depthTestActive && ops.depthFailOp != ops.passOp) // zMask valid and values not the same { - pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask); - zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask); + pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * zMask); + zFail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * zMask); pass |= zFail; } - pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask); - fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask); + pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * sMask); + fail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * sMask); pass |= fail; } } @@ -755,32 +755,32 @@ { switch(operation) { - case VK_STENCIL_OP_KEEP: - output = bufferValue; - break; - case VK_STENCIL_OP_ZERO: - output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - break; - case VK_STENCIL_OP_REPLACE: - output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceQ)); - break; - case VK_STENCIL_OP_INCREMENT_AND_CLAMP: - output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); - break; - case VK_STENCIL_OP_DECREMENT_AND_CLAMP: - output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); - break; - case VK_STENCIL_OP_INVERT: - output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - break; - case VK_STENCIL_OP_INCREMENT_AND_WRAP: - output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1); - break; - case VK_STENCIL_OP_DECREMENT_AND_WRAP: - output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1); - break; - default: - UNIMPLEMENTED("VkStencilOp: %d", int(operation)); + case VK_STENCIL_OP_KEEP: + output = bufferValue; + break; + case VK_STENCIL_OP_ZERO: + output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + break; + case VK_STENCIL_OP_REPLACE: + output = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ)); + break; + case VK_STENCIL_OP_INCREMENT_AND_CLAMP: + output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); + break; + case VK_STENCIL_OP_DECREMENT_AND_CLAMP: + output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); + break; + case VK_STENCIL_OP_INVERT: + output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + break; + case VK_STENCIL_OP_INCREMENT_AND_WRAP: + output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1); + break; + case VK_STENCIL_OP_DECREMENT_AND_WRAP: + output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1); + break; + default: + UNIMPLEMENTED("VkStencilOp: %d", int(operation)); } } @@ -788,80 +788,80 @@ { switch(blendFactorActive) { - case VK_BLEND_FACTOR_ZERO: - // Optimized - break; - case VK_BLEND_FACTOR_ONE: - // Optimized - break; - case VK_BLEND_FACTOR_SRC_COLOR: - blendFactor.x = current.x; - blendFactor.y = current.y; - blendFactor.z = current.z; - break; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - blendFactor.x = Short4(0xFFFFu) - current.x; - blendFactor.y = Short4(0xFFFFu) - current.y; - blendFactor.z = Short4(0xFFFFu) - current.z; - break; - case VK_BLEND_FACTOR_DST_COLOR: - blendFactor.x = pixel.x; - blendFactor.y = pixel.y; - blendFactor.z = pixel.z; - break; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - blendFactor.x = Short4(0xFFFFu) - pixel.x; - blendFactor.y = Short4(0xFFFFu) - pixel.y; - blendFactor.z = Short4(0xFFFFu) - pixel.z; - break; - case VK_BLEND_FACTOR_SRC_ALPHA: - blendFactor.x = current.w; - blendFactor.y = current.w; - blendFactor.z = current.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: - blendFactor.x = Short4(0xFFFFu) - current.w; - blendFactor.y = Short4(0xFFFFu) - current.w; - blendFactor.z = Short4(0xFFFFu) - current.w; - break; - case VK_BLEND_FACTOR_DST_ALPHA: - blendFactor.x = pixel.w; - blendFactor.y = pixel.w; - blendFactor.z = pixel.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - blendFactor.x = Short4(0xFFFFu) - pixel.w; - blendFactor.y = Short4(0xFFFFu) - pixel.w; - blendFactor.z = Short4(0xFFFFu) - pixel.w; - break; - case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: - blendFactor.x = Short4(0xFFFFu) - pixel.w; - blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w)); - blendFactor.y = blendFactor.x; - blendFactor.z = blendFactor.x; - break; - case VK_BLEND_FACTOR_CONSTANT_COLOR: - blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0])); - blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1])); - blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2])); - break; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0])); - blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1])); - blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2])); - break; - case VK_BLEND_FACTOR_CONSTANT_ALPHA: - blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); - blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); - blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); - break; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); - blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); - blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); - break; - default: - UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive)); + case VK_BLEND_FACTOR_ZERO: + // Optimized + break; + case VK_BLEND_FACTOR_ONE: + // Optimized + break; + case VK_BLEND_FACTOR_SRC_COLOR: + blendFactor.x = current.x; + blendFactor.y = current.y; + blendFactor.z = current.z; + break; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + blendFactor.x = Short4(0xFFFFu) - current.x; + blendFactor.y = Short4(0xFFFFu) - current.y; + blendFactor.z = Short4(0xFFFFu) - current.z; + break; + case VK_BLEND_FACTOR_DST_COLOR: + blendFactor.x = pixel.x; + blendFactor.y = pixel.y; + blendFactor.z = pixel.z; + break; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + blendFactor.x = Short4(0xFFFFu) - pixel.x; + blendFactor.y = Short4(0xFFFFu) - pixel.y; + blendFactor.z = Short4(0xFFFFu) - pixel.z; + break; + case VK_BLEND_FACTOR_SRC_ALPHA: + blendFactor.x = current.w; + blendFactor.y = current.w; + blendFactor.z = current.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + blendFactor.x = Short4(0xFFFFu) - current.w; + blendFactor.y = Short4(0xFFFFu) - current.w; + blendFactor.z = Short4(0xFFFFu) - current.w; + break; + case VK_BLEND_FACTOR_DST_ALPHA: + blendFactor.x = pixel.w; + blendFactor.y = pixel.w; + blendFactor.z = pixel.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + blendFactor.x = Short4(0xFFFFu) - pixel.w; + blendFactor.y = Short4(0xFFFFu) - pixel.w; + blendFactor.z = Short4(0xFFFFu) - pixel.w; + break; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + blendFactor.x = Short4(0xFFFFu) - pixel.w; + blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w)); + blendFactor.y = blendFactor.x; + blendFactor.z = blendFactor.x; + break; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[0])); + blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[1])); + blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[2])); + break; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[0])); + blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[1])); + blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[2])); + break; + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3])); + blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3])); + blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3])); + break; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3])); + blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3])); + blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3])); + break; + default: + UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive)); } } @@ -869,49 +869,49 @@ { switch(blendFactorAlphaActive) { - case VK_BLEND_FACTOR_ZERO: - // Optimized - break; - case VK_BLEND_FACTOR_ONE: - // Optimized - break; - case VK_BLEND_FACTOR_SRC_COLOR: - blendFactor.w = current.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - blendFactor.w = Short4(0xFFFFu) - current.w; - break; - case VK_BLEND_FACTOR_DST_COLOR: - blendFactor.w = pixel.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - blendFactor.w = Short4(0xFFFFu) - pixel.w; - break; - case VK_BLEND_FACTOR_SRC_ALPHA: - blendFactor.w = current.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: - blendFactor.w = Short4(0xFFFFu) - current.w; - break; - case VK_BLEND_FACTOR_DST_ALPHA: - blendFactor.w = pixel.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - blendFactor.w = Short4(0xFFFFu) - pixel.w; - break; - case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: - blendFactor.w = Short4(0xFFFFu); - break; - case VK_BLEND_FACTOR_CONSTANT_COLOR: - case VK_BLEND_FACTOR_CONSTANT_ALPHA: - blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); - break; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); - break; - default: - UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive)); + case VK_BLEND_FACTOR_ZERO: + // Optimized + break; + case VK_BLEND_FACTOR_ONE: + // Optimized + break; + case VK_BLEND_FACTOR_SRC_COLOR: + blendFactor.w = current.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + blendFactor.w = Short4(0xFFFFu) - current.w; + break; + case VK_BLEND_FACTOR_DST_COLOR: + blendFactor.w = pixel.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + blendFactor.w = Short4(0xFFFFu) - pixel.w; + break; + case VK_BLEND_FACTOR_SRC_ALPHA: + blendFactor.w = current.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + blendFactor.w = Short4(0xFFFFu) - current.w; + break; + case VK_BLEND_FACTOR_DST_ALPHA: + blendFactor.w = pixel.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + blendFactor.w = Short4(0xFFFFu) - pixel.w; + break; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + blendFactor.w = Short4(0xFFFFu); + break; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3])); + break; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3])); + break; + default: + UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive)); } } @@ -929,139 +929,140 @@ switch(state.targetFormat[index]) { - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - buffer += 2 * x; - buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + buffer += 2 * x; + buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); - pixel.x = (c01 & Short4(0x7C00u)) << 1; - pixel.y = (c01 & Short4(0x03E0u)) << 6; - pixel.z = (c01 & Short4(0x001Fu)) << 11; - pixel.w = (c01 & Short4(0x8000u)) >> 15; + pixel.x = (c01 & Short4(0x7C00u)) << 1; + pixel.y = (c01 & Short4(0x03E0u)) << 6; + pixel.z = (c01 & Short4(0x001Fu)) << 11; + pixel.w = (c01 & Short4(0x8000u)) >> 15; - // Expand to 16 bit range - pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5); - pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10); - pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5); - pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10); - pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5); - pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10); - break; - case VK_FORMAT_R5G6B5_UNORM_PACK16: - buffer += 2 * x; - buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); + // Expand to 16 bit range + pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5); + pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10); + pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5); + pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10); + pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5); + pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10); + break; + case VK_FORMAT_R5G6B5_UNORM_PACK16: + buffer += 2 * x; + buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); - pixel.x = c01 & Short4(0xF800u); - pixel.y = (c01 & Short4(0x07E0u)) << 5; - pixel.z = (c01 & Short4(0x001Fu)) << 11; - pixel.w = Short4(0xFFFFu); + pixel.x = c01 & Short4(0xF800u); + pixel.y = (c01 & Short4(0x07E0u)) << 5; + pixel.z = (c01 & Short4(0x001Fu)) << 11; + pixel.w = Short4(0xFFFFu); - // Expand to 16 bit range - pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5); - pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10); - pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6); - pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12); - pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5); - pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10); - break; - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - buffer += 4 * x; - c01 = *Pointer<Short4>(buffer); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - c23 = *Pointer<Short4>(buffer); - pixel.z = c01; - pixel.y = c01; - pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); - pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); - pixel.x = pixel.z; - pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); - pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); - pixel.y = pixel.z; - pixel.w = pixel.x; - pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); - pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); - pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); - pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); - break; - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - buffer += 4 * x; - c01 = *Pointer<Short4>(buffer); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - c23 = *Pointer<Short4>(buffer); - pixel.z = c01; - pixel.y = c01; - pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); - pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); - pixel.x = pixel.z; - pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); - pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); - pixel.y = pixel.z; - pixel.w = pixel.x; - pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); - pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); - pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); - pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); - break; - case VK_FORMAT_R8_UNORM: - buffer += 1 * x; - pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1); - pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); - pixel.y = Short4(0x0000); - pixel.z = Short4(0x0000); - pixel.w = Short4(0xFFFFu); - break; - case VK_FORMAT_R8G8_UNORM: - buffer += 2 * x; - c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0)); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1)); - pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8); - pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8); - pixel.z = Short4(0x0000u); - pixel.w = Short4(0xFFFFu); - break; - case VK_FORMAT_R16G16B16A16_UNORM: - pixel.x = *Pointer<Short4>(buffer + 8 * x); - pixel.y = *Pointer<Short4>(buffer + 8 * x + 8); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.z = *Pointer<Short4>(buffer + 8 * x); - pixel.w = *Pointer<Short4>(buffer + 8 * x + 8); - transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); - break; - case VK_FORMAT_R16G16_UNORM: - pixel.x = *Pointer<Short4>(buffer + 4 * x); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.y = *Pointer<Short4>(buffer + 4 * x); - pixel.z = pixel.x; - pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); - pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); - pixel.y = pixel.z; - pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z)); - pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z)); - pixel.z = Short4(0xFFFFu); - pixel.w = Short4(0xFFFFu); - break; - case VK_FORMAT_A2B10G10R10_UNORM_PACK32: - { - Int4 v = Int4(0); - v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0); - v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2); - v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3); + // Expand to 16 bit range + pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5); + pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10); + pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6); + pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12); + pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5); + pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10); + break; + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + buffer += 4 * x; + c01 = *Pointer<Short4>(buffer); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + c23 = *Pointer<Short4>(buffer); + pixel.z = c01; + pixel.y = c01; + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); + pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); + pixel.x = pixel.z; + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); + pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); + pixel.y = pixel.z; + pixel.w = pixel.x; + pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); + pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); + pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); + break; + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8B8A8_SRGB: + buffer += 4 * x; + c01 = *Pointer<Short4>(buffer); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + c23 = *Pointer<Short4>(buffer); + pixel.z = c01; + pixel.y = c01; + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); + pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); + pixel.x = pixel.z; + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); + pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); + pixel.y = pixel.z; + pixel.w = pixel.x; + pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); + pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); + pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); + pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); + break; + case VK_FORMAT_R8_UNORM: + buffer += 1 * x; + pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1); + pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); + pixel.y = Short4(0x0000); + pixel.z = Short4(0x0000); + pixel.w = Short4(0xFFFFu); + break; + case VK_FORMAT_R8G8_UNORM: + buffer += 2 * x; + c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0)); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1)); + pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8); + pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8); + pixel.z = Short4(0x0000u); + pixel.w = Short4(0xFFFFu); + break; + case VK_FORMAT_R16G16B16A16_UNORM: + pixel.x = *Pointer<Short4>(buffer + 8 * x); + pixel.y = *Pointer<Short4>(buffer + 8 * x + 8); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.z = *Pointer<Short4>(buffer + 8 * x); + pixel.w = *Pointer<Short4>(buffer + 8 * x + 8); + transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); + break; + case VK_FORMAT_R16G16_UNORM: + pixel.x = *Pointer<Short4>(buffer + 4 * x); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.y = *Pointer<Short4>(buffer + 4 * x); + pixel.z = pixel.x; + pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); + pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); + pixel.y = pixel.z; + pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z)); + pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z)); + pixel.z = Short4(0xFFFFu); + pixel.w = Short4(0xFFFFu); + break; + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + { + Int4 v = Int4(0); + v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0); + v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2); + v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3); - pixel.x = Short4(v << 6) & Short4(0xFFC0u); - pixel.y = Short4(v >> 4) & Short4(0xFFC0u); - pixel.z = Short4(v >> 14) & Short4(0xFFC0u); - pixel.w = Short4(v >> 16) & Short4(0xC000u); - } break; - default: - UNIMPLEMENTED("VkFormat %d", state.targetFormat[index]); + pixel.x = Short4(v << 6) & Short4(0xFFC0u); + pixel.y = Short4(v >> 4) & Short4(0xFFC0u); + pixel.z = Short4(v >> 14) & Short4(0xFFC0u); + pixel.w = Short4(v >> 16) & Short4(0xC000u); + } + break; + default: + UNIMPLEMENTED("VkFormat %d", state.targetFormat[index]); } if(isSRGB(index)) @@ -1103,46 +1104,46 @@ switch(state.blendState[index].blendOperation) { - case VK_BLEND_OP_ADD: - current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); - current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); - current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); - break; - case VK_BLEND_OP_SUBTRACT: - current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); - current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); - current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); - break; - case VK_BLEND_OP_REVERSE_SUBTRACT: - current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x)); - current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y)); - current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z)); - break; - case VK_BLEND_OP_MIN: - current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x)); - current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y)); - current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z)); - break; - case VK_BLEND_OP_MAX: - current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x)); - current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y)); - current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z)); - break; - case VK_BLEND_OP_SRC_EXT: - // No operation - break; - case VK_BLEND_OP_DST_EXT: - current.x = pixel.x; - current.y = pixel.y; - current.z = pixel.z; - break; - case VK_BLEND_OP_ZERO_EXT: - current.x = Short4(0x0000); - current.y = Short4(0x0000); - current.z = Short4(0x0000); - break; - default: - UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation)); + case VK_BLEND_OP_ADD: + current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); + current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); + current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); + break; + case VK_BLEND_OP_SUBTRACT: + current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); + current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); + current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); + break; + case VK_BLEND_OP_REVERSE_SUBTRACT: + current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x)); + current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y)); + current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z)); + break; + case VK_BLEND_OP_MIN: + current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x)); + current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y)); + current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z)); + break; + case VK_BLEND_OP_MAX: + current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x)); + current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y)); + current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z)); + break; + case VK_BLEND_OP_SRC_EXT: + // No operation + break; + case VK_BLEND_OP_DST_EXT: + current.x = pixel.x; + current.y = pixel.y; + current.z = pixel.z; + break; + case VK_BLEND_OP_ZERO_EXT: + current.x = Short4(0x0000); + current.y = Short4(0x0000); + current.z = Short4(0x0000); + break; + default: + UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation)); } blendFactorAlpha(sourceFactor, current, pixel, state.blendState[index].sourceBlendFactorAlpha); @@ -1160,32 +1161,32 @@ switch(state.blendState[index].blendOperationAlpha) { - case VK_BLEND_OP_ADD: - current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); - break; - case VK_BLEND_OP_SUBTRACT: - current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); - break; - case VK_BLEND_OP_REVERSE_SUBTRACT: - current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w)); - break; - case VK_BLEND_OP_MIN: - current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w)); - break; - case VK_BLEND_OP_MAX: - current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w)); - break; - case VK_BLEND_OP_SRC_EXT: - // No operation - break; - case VK_BLEND_OP_DST_EXT: - current.w = pixel.w; - break; - case VK_BLEND_OP_ZERO_EXT: - current.w = Short4(0x0000); - break; - default: - UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha)); + case VK_BLEND_OP_ADD: + current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); + break; + case VK_BLEND_OP_SUBTRACT: + current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); + break; + case VK_BLEND_OP_REVERSE_SUBTRACT: + current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w)); + break; + case VK_BLEND_OP_MIN: + current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w)); + break; + case VK_BLEND_OP_MAX: + current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w)); + break; + case VK_BLEND_OP_SRC_EXT: + // No operation + break; + case VK_BLEND_OP_DST_EXT: + current.w = pixel.w; + break; + case VK_BLEND_OP_ZERO_EXT: + current.w = Short4(0x0000); + break; + default: + UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha)); } } @@ -1198,21 +1199,21 @@ switch(state.targetFormat[index]) { - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_A8B8G8R8_SRGB_PACK32: - current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080); - current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080); - current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080); - current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080); - break; - default: - break; + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8B8A8_SRGB: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8_UNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080); + current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080); + current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080); + current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080); + break; + default: + break; } int rgbaWriteMask = state.colorWriteActive(index); @@ -1220,7 +1221,7 @@ switch(state.targetFormat[index]) { - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: { current.w = current.w & Short4(0x8000u); current.x = As<UShort4>(current.x & Short4(0xF800)) >> 1; @@ -1230,7 +1231,7 @@ current.x = current.x | current.y | current.z | current.w; } break; - case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: { current.x = current.x & Short4(0xF800u); current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5; @@ -1239,121 +1240,121 @@ current.x = current.x | current.y | current.z; } break; - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - if(rgbaWriteMask == 0x7) - { + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + if(rgbaWriteMask == 0x7) + { + current.x = As<Short4>(As<UShort4>(current.x) >> 8); + current.y = As<Short4>(As<UShort4>(current.y) >> 8); + current.z = As<Short4>(As<UShort4>(current.z) >> 8); + + current.z = As<Short4>(PackUnsigned(current.z, current.x)); + current.y = As<Short4>(PackUnsigned(current.y, current.y)); + + current.x = current.z; + current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); + current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); + current.y = current.z; + current.z = As<Short4>(UnpackLow(current.z, current.x)); + current.y = As<Short4>(UnpackHigh(current.y, current.x)); + } + else + { + current.x = As<Short4>(As<UShort4>(current.x) >> 8); + current.y = As<Short4>(As<UShort4>(current.y) >> 8); + current.z = As<Short4>(As<UShort4>(current.z) >> 8); + current.w = As<Short4>(As<UShort4>(current.w) >> 8); + + current.z = As<Short4>(PackUnsigned(current.z, current.x)); + current.y = As<Short4>(PackUnsigned(current.y, current.w)); + + current.x = current.z; + current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); + current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); + current.y = current.z; + current.z = As<Short4>(UnpackLow(current.z, current.x)); + current.y = As<Short4>(UnpackHigh(current.y, current.x)); + } + break; + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8B8A8_SRGB: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + if(rgbaWriteMask == 0x7) + { + current.x = As<Short4>(As<UShort4>(current.x) >> 8); + current.y = As<Short4>(As<UShort4>(current.y) >> 8); + current.z = As<Short4>(As<UShort4>(current.z) >> 8); + + current.z = As<Short4>(PackUnsigned(current.x, current.z)); + current.y = As<Short4>(PackUnsigned(current.y, current.y)); + + current.x = current.z; + current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); + current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); + current.y = current.z; + current.z = As<Short4>(UnpackLow(current.z, current.x)); + current.y = As<Short4>(UnpackHigh(current.y, current.x)); + } + else + { + current.x = As<Short4>(As<UShort4>(current.x) >> 8); + current.y = As<Short4>(As<UShort4>(current.y) >> 8); + current.z = As<Short4>(As<UShort4>(current.z) >> 8); + current.w = As<Short4>(As<UShort4>(current.w) >> 8); + + current.z = As<Short4>(PackUnsigned(current.x, current.z)); + current.y = As<Short4>(PackUnsigned(current.y, current.w)); + + current.x = current.z; + current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); + current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); + current.y = current.z; + current.z = As<Short4>(UnpackLow(current.z, current.x)); + current.y = As<Short4>(UnpackHigh(current.y, current.x)); + } + break; + case VK_FORMAT_R8G8_UNORM: current.x = As<Short4>(As<UShort4>(current.x) >> 8); current.y = As<Short4>(As<UShort4>(current.y) >> 8); - current.z = As<Short4>(As<UShort4>(current.z) >> 8); - - current.z = As<Short4>(PackUnsigned(current.z, current.x)); + current.x = As<Short4>(PackUnsigned(current.x, current.x)); current.y = As<Short4>(PackUnsigned(current.y, current.y)); - - current.x = current.z; - current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); - current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); - current.y = current.z; - current.z = As<Short4>(UnpackLow(current.z, current.x)); - current.y = As<Short4>(UnpackHigh(current.y, current.x)); - } - else - { + current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y)); + break; + case VK_FORMAT_R8_UNORM: current.x = As<Short4>(As<UShort4>(current.x) >> 8); - current.y = As<Short4>(As<UShort4>(current.y) >> 8); - current.z = As<Short4>(As<UShort4>(current.z) >> 8); - current.w = As<Short4>(As<UShort4>(current.w) >> 8); - - current.z = As<Short4>(PackUnsigned(current.z, current.x)); - current.y = As<Short4>(PackUnsigned(current.y, current.w)); - - current.x = current.z; - current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); - current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); + current.x = As<Short4>(PackUnsigned(current.x, current.x)); + break; + case VK_FORMAT_R16G16_UNORM: + current.z = current.x; + current.x = As<Short4>(UnpackLow(current.x, current.y)); + current.z = As<Short4>(UnpackHigh(current.z, current.y)); current.y = current.z; - current.z = As<Short4>(UnpackLow(current.z, current.x)); - current.y = As<Short4>(UnpackHigh(current.y, current.x)); - } - break; - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_A8B8G8R8_SRGB_PACK32: - if(rgbaWriteMask == 0x7) + break; + case VK_FORMAT_R16G16B16A16_UNORM: + transpose4x4(current.x, current.y, current.z, current.w); + break; + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: { - current.x = As<Short4>(As<UShort4>(current.x) >> 8); - current.y = As<Short4>(As<UShort4>(current.y) >> 8); - current.z = As<Short4>(As<UShort4>(current.z) >> 8); - - current.z = As<Short4>(PackUnsigned(current.x, current.z)); - current.y = As<Short4>(PackUnsigned(current.y, current.y)); - - current.x = current.z; - current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); - current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); - current.y = current.z; - current.z = As<Short4>(UnpackLow(current.z, current.x)); - current.y = As<Short4>(UnpackHigh(current.y, current.x)); + auto r = (Int4(current.x) >> 6) & Int4(0x3ff); + auto g = (Int4(current.y) >> 6) & Int4(0x3ff); + auto b = (Int4(current.z) >> 6) & Int4(0x3ff); + auto a = (Int4(current.w) >> 14) & Int4(0x3); + Int4 packed = (a << 30) | (b << 20) | (g << 10) | r; + auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz; + auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw; + current.x = UnpackLow(c02, c13); + current.y = UnpackHigh(c02, c13); + break; } - else - { - current.x = As<Short4>(As<UShort4>(current.x) >> 8); - current.y = As<Short4>(As<UShort4>(current.y) >> 8); - current.z = As<Short4>(As<UShort4>(current.z) >> 8); - current.w = As<Short4>(As<UShort4>(current.w) >> 8); - - current.z = As<Short4>(PackUnsigned(current.x, current.z)); - current.y = As<Short4>(PackUnsigned(current.y, current.w)); - - current.x = current.z; - current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); - current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); - current.y = current.z; - current.z = As<Short4>(UnpackLow(current.z, current.x)); - current.y = As<Short4>(UnpackHigh(current.y, current.x)); - } - break; - case VK_FORMAT_R8G8_UNORM: - current.x = As<Short4>(As<UShort4>(current.x) >> 8); - current.y = As<Short4>(As<UShort4>(current.y) >> 8); - current.x = As<Short4>(PackUnsigned(current.x, current.x)); - current.y = As<Short4>(PackUnsigned(current.y, current.y)); - current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y)); - break; - case VK_FORMAT_R8_UNORM: - current.x = As<Short4>(As<UShort4>(current.x) >> 8); - current.x = As<Short4>(PackUnsigned(current.x, current.x)); - break; - case VK_FORMAT_R16G16_UNORM: - current.z = current.x; - current.x = As<Short4>(UnpackLow(current.x, current.y)); - current.z = As<Short4>(UnpackHigh(current.z, current.y)); - current.y = current.z; - break; - case VK_FORMAT_R16G16B16A16_UNORM: - transpose4x4(current.x, current.y, current.z, current.w); - break; - case VK_FORMAT_A2B10G10R10_UNORM_PACK32: - { - auto r = (Int4(current.x) >> 6) & Int4(0x3ff); - auto g = (Int4(current.y) >> 6) & Int4(0x3ff); - auto b = (Int4(current.z) >> 6) & Int4(0x3ff); - auto a = (Int4(current.w) >> 14) & Int4(0x3); - Int4 packed = (a << 30) | (b << 20) | (g << 10) | r; - auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz; - auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw; - current.x = UnpackLow(c02, c13); - current.y = UnpackHigh(c02, c13); - break; - } - default: - UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); + default: + UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); } Short4 c01 = current.z; Short4 c23 = current.y; - Int xMask; // Combination of all masks + Int xMask; // Combination of all masks if(state.depthTestActive) { @@ -1373,26 +1374,26 @@ switch(state.targetFormat[index]) { - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: { buffer += 2 * x; Int value = *Pointer<Int>(buffer); - Int channelMask = *Pointer<Int>(constants + OFFSET(Constants,mask5551Q[bgraWriteMask & 0xF][0])); + Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask5551Q[bgraWriteMask & 0xF][0])); Int c01 = Extract(As<Int2>(current.x), 0); - Int mask01 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8); + Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8); if(bgraWriteMask != 0x0000000F) { mask01 &= channelMask; } *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); value = *Pointer<Int>(buffer); Int c23 = Extract(As<Int2>(current.x), 1); - Int mask23 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8); + Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8); if(bgraWriteMask != 0x0000000F) { mask23 &= channelMask; @@ -1400,26 +1401,26 @@ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23); } break; - case VK_FORMAT_R5G6B5_UNORM_PACK16: + case VK_FORMAT_R5G6B5_UNORM_PACK16: { buffer += 2 * x; Int value = *Pointer<Int>(buffer); - Int channelMask = *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); + Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask565Q[bgraWriteMask & 0x7][0])); Int c01 = Extract(As<Int2>(current.x), 0); - Int mask01 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8); + Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8); if((bgraWriteMask & 0x00000007) != 0x00000007) { mask01 &= channelMask; } *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); value = *Pointer<Int>(buffer); Int c23 = Extract(As<Int2>(current.x), 1); - Int mask23 = *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8); + Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8); if((bgraWriteMask & 0x00000007) != 0x00000007) { mask23 &= channelMask; @@ -1427,24 +1428,24 @@ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23); } break; - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: { buffer += x * 4; Short4 value = *Pointer<Short4>(buffer); - Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); + Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[bgraWriteMask][0])); - Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); + Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); if(bgraWriteMask != 0x0000000F) { mask01 &= channelMask; } *Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); value = *Pointer<Short4>(buffer); - Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); + Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); if(bgraWriteMask != 0x0000000F) { mask23 &= channelMask; @@ -1452,26 +1453,26 @@ *Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23); } break; - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8B8A8_SRGB: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: { buffer += x * 4; Short4 value = *Pointer<Short4>(buffer); - Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); + Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); - Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); + Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); if(rgbaWriteMask != 0x0000000F) { mask01 &= channelMask; } *Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); value = *Pointer<Short4>(buffer); - Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); + Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); if(rgbaWriteMask != 0x0000000F) { mask23 &= channelMask; @@ -1479,49 +1480,49 @@ *Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23); } break; - case VK_FORMAT_R8G8_UNORM: - if((rgbaWriteMask & 0x00000003) != 0x0) - { - buffer += 2 * x; - Int2 value; - value = Insert(value, *Pointer<Int>(buffer), 0); - Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - value = Insert(value, *Pointer<Int>(buffer + pitch), 1); - - Int2 packedCol = As<Int2>(current.x); - - UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); - if((rgbaWriteMask & 0x3) != 0x3) + case VK_FORMAT_R8G8_UNORM: + if((rgbaWriteMask & 0x00000003) != 0x0) { - Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); - UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); - mergedMask &= rgbaMask; + buffer += 2 * x; + Int2 value; + value = Insert(value, *Pointer<Int>(buffer), 0); + Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + value = Insert(value, *Pointer<Int>(buffer + pitch), 1); + + Int2 packedCol = As<Int2>(current.x); + + UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); + UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); + mergedMask &= rgbaMask; + } + + packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask)); + + *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); + *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1)); } + break; + case VK_FORMAT_R8_UNORM: + if(rgbaWriteMask & 0x00000001) + { + buffer += 1 * x; + Short4 value; + value = Insert(value, *Pointer<Short>(buffer), 0); + Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + value = Insert(value, *Pointer<Short>(buffer + pitch), 1); - packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask)); + current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask); + value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask); + current.x |= value; - *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); - *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1)); - } - break; - case VK_FORMAT_R8_UNORM: - if(rgbaWriteMask & 0x00000001) - { - buffer += 1 * x; - Short4 value; - value = Insert(value, *Pointer<Short>(buffer), 0); - Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - value = Insert(value, *Pointer<Short>(buffer + pitch), 1); - - current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask); - value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask); - current.x |= value; - - *Pointer<Short>(buffer) = Extract(current.x, 0); - *Pointer<Short>(buffer + pitch) = Extract(current.x, 1); - } - break; - case VK_FORMAT_R16G16_UNORM: + *Pointer<Short>(buffer) = Extract(current.x, 0); + *Pointer<Short>(buffer + pitch) = Extract(current.x, 1); + } + break; + case VK_FORMAT_R16G16_UNORM: { buffer += 4 * x; @@ -1530,35 +1531,35 @@ if((rgbaWriteMask & 0x00000003) != 0x00000003) { Short4 masked = value; - current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); + current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0])); + masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0])); current.x |= masked; } - current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); + current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); + value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD01Q) + xMask * 8); current.x |= value; *Pointer<Short4>(buffer) = current.x; - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); value = *Pointer<Short4>(buffer); if((rgbaWriteMask & 0x00000003) != 0x00000003) { Short4 masked = value; - current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); + current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0])); + masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0])); current.y |= masked; } - current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); + current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); + value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD23Q) + xMask * 8); current.y |= value; *Pointer<Short4>(buffer) = current.y; } break; - case VK_FORMAT_R16G16B16A16_UNORM: + case VK_FORMAT_R16G16B16A16_UNORM: { buffer += 8 * x; @@ -1568,13 +1569,13 @@ if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); + current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); + masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0])); current.x |= masked; } - current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8); + current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ0Q) + xMask * 8); + value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ0Q) + xMask * 8); current.x |= value; *Pointer<Short4>(buffer) = current.x; } @@ -1585,18 +1586,18 @@ if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); + current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); + masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0])); current.y |= masked; } - current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8); + current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ1Q) + xMask * 8); + value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ1Q) + xMask * 8); current.y |= value; *Pointer<Short4>(buffer + 8) = current.y; } - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); { Short4 value = *Pointer<Short4>(buffer); @@ -1604,13 +1605,13 @@ if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); + current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); + masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0])); current.z |= masked; } - current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8); + current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ2Q) + xMask * 8); + value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ2Q) + xMask * 8); current.z |= value; *Pointer<Short4>(buffer) = current.z; } @@ -1621,13 +1622,13 @@ if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); + current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); + masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0])); current.w |= masked; } - current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8); + current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ3Q) + xMask * 8); + value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ3Q) + xMask * 8); current.w |= value; *Pointer<Short4>(buffer + 8) = current.w; } @@ -1656,8 +1657,8 @@ *Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask); } break; - default: - UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); + default: + UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); } } @@ -1665,85 +1666,85 @@ { switch(blendFactorActive) { - case VK_BLEND_FACTOR_ZERO: - blendFactor.x = Float4(0); - blendFactor.y = Float4(0); - blendFactor.z = Float4(0); - break; - case VK_BLEND_FACTOR_ONE: - blendFactor.x = Float4(1); - blendFactor.y = Float4(1); - blendFactor.z = Float4(1); - break; - case VK_BLEND_FACTOR_SRC_COLOR: - blendFactor.x = oC.x; - blendFactor.y = oC.y; - blendFactor.z = oC.z; - break; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - blendFactor.x = Float4(1.0f) - oC.x; - blendFactor.y = Float4(1.0f) - oC.y; - blendFactor.z = Float4(1.0f) - oC.z; - break; - case VK_BLEND_FACTOR_DST_COLOR: - blendFactor.x = pixel.x; - blendFactor.y = pixel.y; - blendFactor.z = pixel.z; - break; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - blendFactor.x = Float4(1.0f) - pixel.x; - blendFactor.y = Float4(1.0f) - pixel.y; - blendFactor.z = Float4(1.0f) - pixel.z; - break; - case VK_BLEND_FACTOR_SRC_ALPHA: - blendFactor.x = oC.w; - blendFactor.y = oC.w; - blendFactor.z = oC.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: - blendFactor.x = Float4(1.0f) - oC.w; - blendFactor.y = Float4(1.0f) - oC.w; - blendFactor.z = Float4(1.0f) - oC.w; - break; - case VK_BLEND_FACTOR_DST_ALPHA: - blendFactor.x = pixel.w; - blendFactor.y = pixel.w; - blendFactor.z = pixel.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - blendFactor.x = Float4(1.0f) - pixel.w; - blendFactor.y = Float4(1.0f) - pixel.w; - blendFactor.z = Float4(1.0f) - pixel.w; - break; - case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: - blendFactor.x = Float4(1.0f) - pixel.w; - blendFactor.x = Min(blendFactor.x, oC.w); - blendFactor.y = blendFactor.x; - blendFactor.z = blendFactor.x; - break; - case VK_BLEND_FACTOR_CONSTANT_COLOR: - blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0])); - blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1])); - blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2])); - break; - case VK_BLEND_FACTOR_CONSTANT_ALPHA: - blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3])); - blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3])); - blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3])); - break; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0])); - blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1])); - blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2])); - break; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); - blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); - blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); - break; + case VK_BLEND_FACTOR_ZERO: + blendFactor.x = Float4(0); + blendFactor.y = Float4(0); + blendFactor.z = Float4(0); + break; + case VK_BLEND_FACTOR_ONE: + blendFactor.x = Float4(1); + blendFactor.y = Float4(1); + blendFactor.z = Float4(1); + break; + case VK_BLEND_FACTOR_SRC_COLOR: + blendFactor.x = oC.x; + blendFactor.y = oC.y; + blendFactor.z = oC.z; + break; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + blendFactor.x = Float4(1.0f) - oC.x; + blendFactor.y = Float4(1.0f) - oC.y; + blendFactor.z = Float4(1.0f) - oC.z; + break; + case VK_BLEND_FACTOR_DST_COLOR: + blendFactor.x = pixel.x; + blendFactor.y = pixel.y; + blendFactor.z = pixel.z; + break; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + blendFactor.x = Float4(1.0f) - pixel.x; + blendFactor.y = Float4(1.0f) - pixel.y; + blendFactor.z = Float4(1.0f) - pixel.z; + break; + case VK_BLEND_FACTOR_SRC_ALPHA: + blendFactor.x = oC.w; + blendFactor.y = oC.w; + blendFactor.z = oC.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + blendFactor.x = Float4(1.0f) - oC.w; + blendFactor.y = Float4(1.0f) - oC.w; + blendFactor.z = Float4(1.0f) - oC.w; + break; + case VK_BLEND_FACTOR_DST_ALPHA: + blendFactor.x = pixel.w; + blendFactor.y = pixel.w; + blendFactor.z = pixel.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + blendFactor.x = Float4(1.0f) - pixel.w; + blendFactor.y = Float4(1.0f) - pixel.w; + blendFactor.z = Float4(1.0f) - pixel.w; + break; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + blendFactor.x = Float4(1.0f) - pixel.w; + blendFactor.x = Min(blendFactor.x, oC.w); + blendFactor.y = blendFactor.x; + blendFactor.z = blendFactor.x; + break; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[0])); + blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[1])); + blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[2])); + break; + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3])); + blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3])); + blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3])); + break; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[0])); + blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[1])); + blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[2])); + break; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3])); + blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3])); + blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3])); + break; - default: - UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive)); + default: + UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive)); } } @@ -1751,49 +1752,49 @@ { switch(blendFactorAlphaActive) { - case VK_BLEND_FACTOR_ZERO: - blendFactor.w = Float4(0); - break; - case VK_BLEND_FACTOR_ONE: - blendFactor.w = Float4(1); - break; - case VK_BLEND_FACTOR_SRC_COLOR: - blendFactor.w = oC.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - blendFactor.w = Float4(1.0f) - oC.w; - break; - case VK_BLEND_FACTOR_DST_COLOR: - blendFactor.w = pixel.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - blendFactor.w = Float4(1.0f) - pixel.w; - break; - case VK_BLEND_FACTOR_SRC_ALPHA: - blendFactor.w = oC.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: - blendFactor.w = Float4(1.0f) - oC.w; - break; - case VK_BLEND_FACTOR_DST_ALPHA: - blendFactor.w = pixel.w; - break; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - blendFactor.w = Float4(1.0f) - pixel.w; - break; - case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: - blendFactor.w = Float4(1.0f); - break; - case VK_BLEND_FACTOR_CONSTANT_COLOR: - case VK_BLEND_FACTOR_CONSTANT_ALPHA: - blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3])); - break; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); - break; - default: - UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive)); + case VK_BLEND_FACTOR_ZERO: + blendFactor.w = Float4(0); + break; + case VK_BLEND_FACTOR_ONE: + blendFactor.w = Float4(1); + break; + case VK_BLEND_FACTOR_SRC_COLOR: + blendFactor.w = oC.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + blendFactor.w = Float4(1.0f) - oC.w; + break; + case VK_BLEND_FACTOR_DST_COLOR: + blendFactor.w = pixel.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + blendFactor.w = Float4(1.0f) - pixel.w; + break; + case VK_BLEND_FACTOR_SRC_ALPHA: + blendFactor.w = oC.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + blendFactor.w = Float4(1.0f) - oC.w; + break; + case VK_BLEND_FACTOR_DST_ALPHA: + blendFactor.w = pixel.w; + break; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + blendFactor.w = Float4(1.0f) - pixel.w; + break; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + blendFactor.w = Float4(1.0f); + break; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3])); + break; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3])); + break; + default: + UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive)); } } @@ -1829,89 +1830,89 @@ switch(state.targetFormat[index]) { - case VK_FORMAT_R32_SINT: - case VK_FORMAT_R32_UINT: - case VK_FORMAT_R32_SFLOAT: - // FIXME: movlps - pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0); - pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - // FIXME: movhps - pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0); - pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4); - pixel.y = pixel.z = pixel.w = one; - break; - case VK_FORMAT_R32G32_SINT: - case VK_FORMAT_R32G32_UINT: - case VK_FORMAT_R32G32_SFLOAT: - pixel.x = *Pointer<Float4>(buffer + 8 * x, 16); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - pixel.y = *Pointer<Float4>(buffer + 8 * x, 16); - pixel.z = pixel.x; - pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202); - pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313); - pixel.y = pixel.z; - pixel.z = pixel.w = one; - break; - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SINT: - case VK_FORMAT_R32G32B32A32_UINT: - pixel.x = *Pointer<Float4>(buffer + 16 * x, 16); - pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - pixel.z = *Pointer<Float4>(buffer + 16 * x, 16); - pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16); - transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); - break; - case VK_FORMAT_R16_SFLOAT: - pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0)); - pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2)); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0)); - pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2)); - pixel.y = pixel.z = pixel.w = one; - break; - case VK_FORMAT_R16G16_SFLOAT: - pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0)); - pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2)); - pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4)); - pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6)); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0)); - pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2)); - pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4)); - pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6)); - pixel.z = pixel.w = one; - break; - case VK_FORMAT_R16G16B16A16_SFLOAT: - pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0)); - pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2)); - pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4)); - pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6)); - pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8)); - pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa)); - pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc)); - pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe)); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0)); - pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2)); - pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4)); - pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6)); - pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8)); - pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa)); - pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc)); - pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe)); - break; - case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0)); - pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4)); - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0)); - pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4)); - transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); - break; - default: - UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); + case VK_FORMAT_R32_SINT: + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R32_SFLOAT: + // FIXME: movlps + pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0); + pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + // FIXME: movhps + pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0); + pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4); + pixel.y = pixel.z = pixel.w = one; + break; + case VK_FORMAT_R32G32_SINT: + case VK_FORMAT_R32G32_UINT: + case VK_FORMAT_R32G32_SFLOAT: + pixel.x = *Pointer<Float4>(buffer + 8 * x, 16); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.y = *Pointer<Float4>(buffer + 8 * x, 16); + pixel.z = pixel.x; + pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202); + pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313); + pixel.y = pixel.z; + pixel.z = pixel.w = one; + break; + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SINT: + case VK_FORMAT_R32G32B32A32_UINT: + pixel.x = *Pointer<Float4>(buffer + 16 * x, 16); + pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.z = *Pointer<Float4>(buffer + 16 * x, 16); + pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16); + transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); + break; + case VK_FORMAT_R16_SFLOAT: + pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0)); + pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2)); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0)); + pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2)); + pixel.y = pixel.z = pixel.w = one; + break; + case VK_FORMAT_R16G16_SFLOAT: + pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0)); + pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2)); + pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4)); + pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6)); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0)); + pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2)); + pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4)); + pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6)); + pixel.z = pixel.w = one; + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0)); + pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2)); + pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4)); + pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6)); + pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8)); + pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa)); + pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc)); + pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe)); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0)); + pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2)); + pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4)); + pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6)); + pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8)); + pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa)); + pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc)); + pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe)); + break; + case VK_FORMAT_B10G11R11_UFLOAT_PACK32: + pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0)); + pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4)); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0)); + pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4)); + transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); + break; + default: + UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); } // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor @@ -1931,46 +1932,46 @@ switch(state.blendState[index].blendOperation) { - case VK_BLEND_OP_ADD: - oC.x += pixel.x; - oC.y += pixel.y; - oC.z += pixel.z; - break; - case VK_BLEND_OP_SUBTRACT: - oC.x -= pixel.x; - oC.y -= pixel.y; - oC.z -= pixel.z; - break; - case VK_BLEND_OP_REVERSE_SUBTRACT: - oC.x = pixel.x - oC.x; - oC.y = pixel.y - oC.y; - oC.z = pixel.z - oC.z; - break; - case VK_BLEND_OP_MIN: - oC.x = Min(oC.x, pixel.x); - oC.y = Min(oC.y, pixel.y); - oC.z = Min(oC.z, pixel.z); - break; - case VK_BLEND_OP_MAX: - oC.x = Max(oC.x, pixel.x); - oC.y = Max(oC.y, pixel.y); - oC.z = Max(oC.z, pixel.z); - break; - case VK_BLEND_OP_SRC_EXT: - // No operation - break; - case VK_BLEND_OP_DST_EXT: - oC.x = pixel.x; - oC.y = pixel.y; - oC.z = pixel.z; - break; - case VK_BLEND_OP_ZERO_EXT: - oC.x = Float4(0.0f); - oC.y = Float4(0.0f); - oC.z = Float4(0.0f); - break; - default: - UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation)); + case VK_BLEND_OP_ADD: + oC.x += pixel.x; + oC.y += pixel.y; + oC.z += pixel.z; + break; + case VK_BLEND_OP_SUBTRACT: + oC.x -= pixel.x; + oC.y -= pixel.y; + oC.z -= pixel.z; + break; + case VK_BLEND_OP_REVERSE_SUBTRACT: + oC.x = pixel.x - oC.x; + oC.y = pixel.y - oC.y; + oC.z = pixel.z - oC.z; + break; + case VK_BLEND_OP_MIN: + oC.x = Min(oC.x, pixel.x); + oC.y = Min(oC.y, pixel.y); + oC.z = Min(oC.z, pixel.z); + break; + case VK_BLEND_OP_MAX: + oC.x = Max(oC.x, pixel.x); + oC.y = Max(oC.y, pixel.y); + oC.z = Max(oC.z, pixel.z); + break; + case VK_BLEND_OP_SRC_EXT: + // No operation + break; + case VK_BLEND_OP_DST_EXT: + oC.x = pixel.x; + oC.y = pixel.y; + oC.z = pixel.z; + break; + case VK_BLEND_OP_ZERO_EXT: + oC.x = Float4(0.0f); + oC.y = Float4(0.0f); + oC.z = Float4(0.0f); + break; + default: + UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperation)); } blendFactorAlpha(sourceFactor, oC, pixel, state.blendState[index].sourceBlendFactorAlpha); @@ -1981,33 +1982,33 @@ switch(state.blendState[index].blendOperationAlpha) { - case VK_BLEND_OP_ADD: - oC.w += pixel.w; - break; - case VK_BLEND_OP_SUBTRACT: - oC.w -= pixel.w; - break; - case VK_BLEND_OP_REVERSE_SUBTRACT: - pixel.w -= oC.w; - oC.w = pixel.w; - break; - case VK_BLEND_OP_MIN: - oC.w = Min(oC.w, pixel.w); - break; - case VK_BLEND_OP_MAX: - oC.w = Max(oC.w, pixel.w); - break; - case VK_BLEND_OP_SRC_EXT: - // No operation - break; - case VK_BLEND_OP_DST_EXT: - oC.w = pixel.w; - break; - case VK_BLEND_OP_ZERO_EXT: - oC.w = Float4(0.0f); - break; - default: - UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha)); + case VK_BLEND_OP_ADD: + oC.w += pixel.w; + break; + case VK_BLEND_OP_SUBTRACT: + oC.w -= pixel.w; + break; + case VK_BLEND_OP_REVERSE_SUBTRACT: + pixel.w -= oC.w; + oC.w = pixel.w; + break; + case VK_BLEND_OP_MIN: + oC.w = Min(oC.w, pixel.w); + break; + case VK_BLEND_OP_MAX: + oC.w = Max(oC.w, pixel.w); + break; + case VK_BLEND_OP_SRC_EXT: + // No operation + break; + case VK_BLEND_OP_DST_EXT: + oC.w = pixel.w; + break; + case VK_BLEND_OP_ZERO_EXT: + oC.w = Float4(0.0f); + break; + default: + UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha)); } } @@ -2015,49 +2016,49 @@ { switch(state.targetFormat[index]) { - case VK_FORMAT_R16_SFLOAT: - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_R32_SINT: - case VK_FORMAT_R32_UINT: - case VK_FORMAT_R16_SINT: - case VK_FORMAT_R16_UINT: - case VK_FORMAT_R8_SINT: - case VK_FORMAT_R8_UINT: - case VK_FORMAT_A2B10G10R10_UINT_PACK32: - break; - case VK_FORMAT_R16G16_SFLOAT: - case VK_FORMAT_R32G32_SFLOAT: - case VK_FORMAT_R32G32_SINT: - case VK_FORMAT_R32G32_UINT: - case VK_FORMAT_R16G16_SINT: - case VK_FORMAT_R16G16_UINT: - case VK_FORMAT_R8G8_SINT: - case VK_FORMAT_R8G8_UINT: - oC.z = oC.x; - oC.x = UnpackLow(oC.x, oC.y); - oC.z = UnpackHigh(oC.z, oC.y); - oC.y = oC.z; - break; - case VK_FORMAT_R16G16B16A16_SFLOAT: - case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SINT: - case VK_FORMAT_R32G32B32A32_UINT: - case VK_FORMAT_R16G16B16A16_SINT: - case VK_FORMAT_R16G16B16A16_UINT: - case VK_FORMAT_R8G8B8A8_SINT: - case VK_FORMAT_R8G8B8A8_UINT: - case VK_FORMAT_A8B8G8R8_UINT_PACK32: - case VK_FORMAT_A8B8G8R8_SINT_PACK32: - transpose4x4(oC.x, oC.y, oC.z, oC.w); - break; - default: - UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); + case VK_FORMAT_R16_SFLOAT: + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R32_SINT: + case VK_FORMAT_R32_UINT: + case VK_FORMAT_R16_SINT: + case VK_FORMAT_R16_UINT: + case VK_FORMAT_R8_SINT: + case VK_FORMAT_R8_UINT: + case VK_FORMAT_A2B10G10R10_UINT_PACK32: + break; + case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_R32G32_SFLOAT: + case VK_FORMAT_R32G32_SINT: + case VK_FORMAT_R32G32_UINT: + case VK_FORMAT_R16G16_SINT: + case VK_FORMAT_R16G16_UINT: + case VK_FORMAT_R8G8_SINT: + case VK_FORMAT_R8G8_UINT: + oC.z = oC.x; + oC.x = UnpackLow(oC.x, oC.y); + oC.z = UnpackHigh(oC.z, oC.y); + oC.y = oC.z; + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + case VK_FORMAT_B10G11R11_UFLOAT_PACK32: + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SINT: + case VK_FORMAT_R32G32B32A32_UINT: + case VK_FORMAT_R16G16B16A16_SINT: + case VK_FORMAT_R16G16B16A16_UINT: + case VK_FORMAT_R8G8B8A8_SINT: + case VK_FORMAT_R8G8B8A8_UINT: + case VK_FORMAT_A8B8G8R8_UINT_PACK32: + case VK_FORMAT_A8B8G8R8_SINT_PACK32: + transpose4x4(oC.x, oC.y, oC.z, oC.w); + break; + default: + UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); } int rgbaWriteMask = state.colorWriteActive(index); - Int xMask; // Combination of all masks + Int xMask; // Combination of all masks if(state.depthTestActive) { @@ -2080,520 +2081,520 @@ switch(targetFormat) { - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_R32_SINT: - case VK_FORMAT_R32_UINT: - if(rgbaWriteMask & 0x00000001) - { - buffer += 4 * x; - - // FIXME: movlps - value.x = *Pointer<Float>(buffer + 0); - value.y = *Pointer<Float>(buffer + 4); - - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - - // FIXME: movhps - value.z = *Pointer<Float>(buffer + 0); - value.w = *Pointer<Float>(buffer + 4); - - oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16)); - oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); - - // FIXME: movhps - *Pointer<Float>(buffer + 0) = oC.x.z; - *Pointer<Float>(buffer + 4) = oC.x.w; - - buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - - // FIXME: movlps - *Pointer<Float>(buffer + 0) = oC.x.x; - *Pointer<Float>(buffer + 4) = oC.x.y; - } - break; - case VK_FORMAT_R16_SFLOAT: - if(rgbaWriteMask & 0x00000001) - { - buffer += 2 * x; - - value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0); - value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1); - - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - - value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2); - value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3); - - oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); - oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); - - *Pointer<Half>(buffer + 0) = Half(oC.x.z); - *Pointer<Half>(buffer + 2) = Half(oC.x.w); - - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - - *Pointer<Half>(buffer + 0) = Half(oC.x.x); - *Pointer<Half>(buffer + 2) = Half(oC.x.y); - } - break; - case VK_FORMAT_R16_SINT: - case VK_FORMAT_R16_UINT: - if(rgbaWriteMask & 0x00000001) - { - buffer += 2 * x; - - UShort4 xyzw; - xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0)); - - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - - xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1)); - value = As<Float4>(Int4(xyzw)); - - oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); - oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); - - if(targetFormat == VK_FORMAT_R16_SINT) + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R32_SINT: + case VK_FORMAT_R32_UINT: + if(rgbaWriteMask & 0x00000001) { - Float component = oC.x.z; - *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); - component = oC.x.w; - *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); + buffer += 4 * x; + + // FIXME: movlps + value.x = *Pointer<Float>(buffer + 0); + value.y = *Pointer<Float>(buffer + 4); + + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + // FIXME: movhps + value.z = *Pointer<Float>(buffer + 0); + value.w = *Pointer<Float>(buffer + 4); + + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); + + // FIXME: movhps + *Pointer<Float>(buffer + 0) = oC.x.z; + *Pointer<Float>(buffer + 4) = oC.x.w; buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - component = oC.x.x; - *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); - component = oC.x.y; - *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); + // FIXME: movlps + *Pointer<Float>(buffer + 0) = oC.x.x; + *Pointer<Float>(buffer + 4) = oC.x.y; } - else // VK_FORMAT_R16_UINT + break; + case VK_FORMAT_R16_SFLOAT: + if(rgbaWriteMask & 0x00000001) { - Float component = oC.x.z; - *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); - component = oC.x.w; - *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); + buffer += 2 * x; + + value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0); + value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1); + + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2); + value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3); + + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); + + *Pointer<Half>(buffer + 0) = Half(oC.x.z); + *Pointer<Half>(buffer + 2) = Half(oC.x.w); buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - component = oC.x.x; - *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); - component = oC.x.y; - *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); + *Pointer<Half>(buffer + 0) = Half(oC.x.x); + *Pointer<Half>(buffer + 2) = Half(oC.x.y); } - } - break; - case VK_FORMAT_R8_SINT: - case VK_FORMAT_R8_UINT: - if(rgbaWriteMask & 0x00000001) - { - buffer += x; - - UInt xyzw, packedCol; - - xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF; - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - xyzw |= UInt(*Pointer<UShort>(buffer)) << 16; - - Short4 tmpCol = Short4(As<Int4>(oC.x)); - if(targetFormat == VK_FORMAT_R8_SINT) + break; + case VK_FORMAT_R16_SINT: + case VK_FORMAT_R16_UINT: + if(rgbaWriteMask & 0x00000001) { - tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol)); + buffer += 2 * x; + + UShort4 xyzw; + xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0)); + + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1)); + value = As<Float4>(Int4(xyzw)); + + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); + + if(targetFormat == VK_FORMAT_R16_SINT) + { + Float component = oC.x.z; + *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); + component = oC.x.w; + *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); + + buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + component = oC.x.x; + *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); + component = oC.x.y; + *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); + } + else // VK_FORMAT_R16_UINT + { + Float component = oC.x.z; + *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); + component = oC.x.w; + *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); + + buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + component = oC.x.x; + *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); + component = oC.x.y; + *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); + } } - else + break; + case VK_FORMAT_R8_SINT: + case VK_FORMAT_R8_UINT: + if(rgbaWriteMask & 0x00000001) { - tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol)); + buffer += x; + + UInt xyzw, packedCol; + + xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF; + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + xyzw |= UInt(*Pointer<UShort>(buffer)) << 16; + + Short4 tmpCol = Short4(As<Int4>(oC.x)); + if(targetFormat == VK_FORMAT_R8_SINT) + { + tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol)); + } + else + { + tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol)); + } + packedCol = Extract(As<Int2>(tmpCol), 0); + + packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) | + (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask)); + + *Pointer<UShort>(buffer) = UShort(packedCol >> 16); + buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + *Pointer<UShort>(buffer) = UShort(packedCol); } - packedCol = Extract(As<Int2>(tmpCol), 0); + break; + case VK_FORMAT_R32G32_SFLOAT: + case VK_FORMAT_R32G32_SINT: + case VK_FORMAT_R32G32_UINT: + buffer += 8 * x; - packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) | - (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask)); + value = *Pointer<Float4>(buffer); - *Pointer<UShort>(buffer) = UShort(packedCol >> 16); - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - *Pointer<UShort>(buffer) = UShort(packedCol); - } - break; - case VK_FORMAT_R32G32_SFLOAT: - case VK_FORMAT_R32G32_SINT: - case VK_FORMAT_R32G32_UINT: - buffer += 8 * x; - - value = *Pointer<Float4>(buffer); - - if((rgbaWriteMask & 0x00000003) != 0x00000003) - { - Float4 masked = value; - oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); - masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); - oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); - } - - oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16)); - oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); - *Pointer<Float4>(buffer) = oC.x; - - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - - value = *Pointer<Float4>(buffer); - - if((rgbaWriteMask & 0x00000003) != 0x00000003) - { - Float4 masked; - - masked = value; - oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); - masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); - oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); - } - - oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16)); - oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); - *Pointer<Float4>(buffer) = oC.y; - break; - case VK_FORMAT_R16G16_SFLOAT: - if((rgbaWriteMask & 0x00000003) != 0x0) - { - buffer += 4 * x; - - UInt2 rgbaMask; - UInt2 packedCol; - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1); - - UShort4 value = *Pointer<UShort4>(buffer); - UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); - if((rgbaWriteMask & 0x3) != 0x3) - { - Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); - rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); - mergedMask &= rgbaMask; - } - *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask); - - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1); - value = *Pointer<UShort4>(buffer); - mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); - if((rgbaWriteMask & 0x3) != 0x3) - { - mergedMask &= rgbaMask; - } - *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask); - } - break; - case VK_FORMAT_R16G16_SINT: - case VK_FORMAT_R16G16_UINT: - if((rgbaWriteMask & 0x00000003) != 0x0) - { - buffer += 4 * x; - - UInt2 rgbaMask; - UShort4 packedCol = UShort4(As<Int4>(oC.x)); - UShort4 value = *Pointer<UShort4>(buffer); - UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); - if((rgbaWriteMask & 0x3) != 0x3) - { - Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); - rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); - mergedMask &= rgbaMask; - } - *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); - - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - - packedCol = UShort4(As<Int4>(oC.y)); - value = *Pointer<UShort4>(buffer); - mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); - if((rgbaWriteMask & 0x3) != 0x3) - { - mergedMask &= rgbaMask; - } - *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); - } - break; - case VK_FORMAT_R8G8_SINT: - case VK_FORMAT_R8G8_UINT: - if((rgbaWriteMask & 0x00000003) != 0x0) - { - buffer += 2 * x; - - Int2 xyzw, packedCol; - - xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1); - - if(targetFormat == VK_FORMAT_R8G8_SINT) - { - packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); - } - else - { - packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); - } - - UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); - if((rgbaWriteMask & 0x3) != 0x3) - { - Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); - UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); - mergedMask &= rgbaMask; - } - - packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask)); - - *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1)); - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); - } - break; - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SINT: - case VK_FORMAT_R32G32B32A32_UINT: - buffer += 16 * x; - - { - value = *Pointer<Float4>(buffer, 16); - - if(rgbaWriteMask != 0x0000000F) + if((rgbaWriteMask & 0x00000003) != 0x00000003) { Float4 masked = value; - oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); - masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0]))); + masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0]))); oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); } - oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ01X) + xMask * 16, 16)); oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); - *Pointer<Float4>(buffer, 16) = oC.x; - } + *Pointer<Float4>(buffer) = oC.x; - { - value = *Pointer<Float4>(buffer + 16, 16); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - if(rgbaWriteMask != 0x0000000F) + value = *Pointer<Float4>(buffer); + + if((rgbaWriteMask & 0x00000003) != 0x00000003) { - Float4 masked = value; - oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); - masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); + Float4 masked; + + masked = value; + oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0]))); + masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0]))); oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); } - oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16)); + oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ23X) + xMask * 16, 16)); oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); - *Pointer<Float4>(buffer + 16, 16) = oC.y; - } - - buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); - - { - value = *Pointer<Float4>(buffer, 16); - - if(rgbaWriteMask != 0x0000000F) + *Pointer<Float4>(buffer) = oC.y; + break; + case VK_FORMAT_R16G16_SFLOAT: + if((rgbaWriteMask & 0x00000003) != 0x0) { - Float4 masked = value; - oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); - masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); - oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked)); + buffer += 4 * x; + + UInt2 rgbaMask; + UInt2 packedCol; + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1); + + UShort4 value = *Pointer<UShort4>(buffer); + UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); + rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); + mergedMask &= rgbaMask; + } + *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask); + + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1); + value = *Pointer<UShort4>(buffer); + mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + mergedMask &= rgbaMask; + } + *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask); + } + break; + case VK_FORMAT_R16G16_SINT: + case VK_FORMAT_R16G16_UINT: + if((rgbaWriteMask & 0x00000003) != 0x0) + { + buffer += 4 * x; + + UInt2 rgbaMask; + UShort4 packedCol = UShort4(As<Int4>(oC.x)); + UShort4 value = *Pointer<UShort4>(buffer); + UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); + rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); + mergedMask &= rgbaMask; + } + *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); + + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + packedCol = UShort4(As<Int4>(oC.y)); + value = *Pointer<UShort4>(buffer); + mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + mergedMask &= rgbaMask; + } + *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); + } + break; + case VK_FORMAT_R8G8_SINT: + case VK_FORMAT_R8G8_UINT: + if((rgbaWriteMask & 0x00000003) != 0x0) + { + buffer += 2 * x; + + Int2 xyzw, packedCol; + + xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1); + + if(targetFormat == VK_FORMAT_R8G8_SINT) + { + packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); + } + else + { + packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); + } + + UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); + UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); + mergedMask &= rgbaMask; + } + + packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask)); + + *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1)); + buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); + } + break; + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SINT: + case VK_FORMAT_R32G32B32A32_UINT: + buffer += 16 * x; + + { + value = *Pointer<Float4>(buffer, 16); + + if(rgbaWriteMask != 0x0000000F) + { + Float4 masked = value; + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0]))); + masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0]))); + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); + } + + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskX0X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX0X) + xMask * 16, 16)); + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); + *Pointer<Float4>(buffer, 16) = oC.x; } - oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16)); - oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value)); - *Pointer<Float4>(buffer, 16) = oC.z; - } - - { - value = *Pointer<Float4>(buffer + 16, 16); - - if(rgbaWriteMask != 0x0000000F) { - Float4 masked = value; - oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); - masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); - oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked)); - } + value = *Pointer<Float4>(buffer + 16, 16); - oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); - value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16)); - oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value)); - *Pointer<Float4>(buffer + 16, 16) = oC.w; - } - break; - case VK_FORMAT_R16G16B16A16_SFLOAT: - if((rgbaWriteMask & 0x0000000F) != 0x0) - { - buffer += 8 * x; + if(rgbaWriteMask != 0x0000000F) + { + Float4 masked = value; + oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0]))); + masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0]))); + oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); + } - UInt4 rgbaMask; - UInt4 value = *Pointer<UInt4>(buffer); - UInt4 packedCol; - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3); - UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16); - if((rgbaWriteMask & 0xF) != 0xF) - { - UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); - rgbaMask = UInt4(tmpMask, tmpMask); - mergedMask &= rgbaMask; + oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskX1X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX1X) + xMask * 16, 16)); + oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); + *Pointer<Float4>(buffer + 16, 16) = oC.y; } - *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask); buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - value = *Pointer<UInt4>(buffer); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2); - packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3); - mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16); - if((rgbaWriteMask & 0xF) != 0xF) { - mergedMask &= rgbaMask; + value = *Pointer<Float4>(buffer, 16); + + if(rgbaWriteMask != 0x0000000F) + { + Float4 masked = value; + oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0]))); + masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0]))); + oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked)); + } + + oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskX2X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX2X) + xMask * 16, 16)); + oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value)); + *Pointer<Float4>(buffer, 16) = oC.z; } - *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask); - } - break; - case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - if((rgbaWriteMask & 0x7) != 0x0) - { - buffer += 4 * x; - unsigned int mask = ((rgbaWriteMask & 0x1) ? 0x000007FF : 0) | - ((rgbaWriteMask & 0x2) ? 0x003FF800 : 0) | - ((rgbaWriteMask & 0x4) ? 0xFFC00000 : 0); - UInt2 mergedMask(mask, mask); - - UInt2 value; - value = Insert(value, r11g11b10Pack(oC.x), 0); - value = Insert(value, r11g11b10Pack(oC.y), 1); - *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - value = Insert(value, r11g11b10Pack(oC.z), 0); - value = Insert(value, r11g11b10Pack(oC.w), 1); - *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask); - } - break; - case VK_FORMAT_R16G16B16A16_SINT: - case VK_FORMAT_R16G16B16A16_UINT: - if((rgbaWriteMask & 0x0000000F) != 0x0) - { - buffer += 8 * x; - - UInt4 rgbaMask; - UShort8 value = *Pointer<UShort8>(buffer); - UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y))); - UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16); - if((rgbaWriteMask & 0xF) != 0xF) { - UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); - rgbaMask = UInt4(tmpMask, tmpMask); - mergedMask &= rgbaMask; + value = *Pointer<Float4>(buffer + 16, 16); + + if(rgbaWriteMask != 0x0000000F) + { + Float4 masked = value; + oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0]))); + masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0]))); + oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked)); + } + + oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskX3X) + xMask * 16, 16)); + value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX3X) + xMask * 16, 16)); + oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value)); + *Pointer<Float4>(buffer + 16, 16) = oC.w; } - *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); - - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - - value = *Pointer<UShort8>(buffer); - packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w))); - mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16); - if((rgbaWriteMask & 0xF) != 0xF) + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + if((rgbaWriteMask & 0x0000000F) != 0x0) { - mergedMask &= rgbaMask; - } - *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); - } - break; - case VK_FORMAT_R8G8B8A8_SINT: - case VK_FORMAT_R8G8B8A8_UINT: - case VK_FORMAT_A8B8G8R8_UINT_PACK32: - case VK_FORMAT_A8B8G8R8_SINT_PACK32: - if((rgbaWriteMask & 0x0000000F) != 0x0) - { - UInt2 value, packedCol, mergedMask; + buffer += 8 * x; - buffer += 4 * x; + UInt4 rgbaMask; + UInt4 value = *Pointer<UInt4>(buffer); + UInt4 packedCol; + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3); + UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16); + if((rgbaWriteMask & 0xF) != 0xF) + { + UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); + rgbaMask = UInt4(tmpMask, tmpMask); + mergedMask &= rgbaMask; + } + *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask); - bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32; + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - if(isSigned) - { - packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); + value = *Pointer<UInt4>(buffer); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2); + packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3); + mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16); + if((rgbaWriteMask & 0xF) != 0xF) + { + mergedMask &= rgbaMask; + } + *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask); } - else + break; + case VK_FORMAT_B10G11R11_UFLOAT_PACK32: + if((rgbaWriteMask & 0x7) != 0x0) { - packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); - } - value = *Pointer<UInt2>(buffer, 16); - mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); - if(rgbaWriteMask != 0xF) - { - mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); - } - *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); + buffer += 4 * x; - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + unsigned int mask = ((rgbaWriteMask & 0x1) ? 0x000007FF : 0) | + ((rgbaWriteMask & 0x2) ? 0x003FF800 : 0) | + ((rgbaWriteMask & 0x4) ? 0xFFC00000 : 0); + UInt2 mergedMask(mask, mask); - if(isSigned) - { - packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); + UInt2 value; + value = Insert(value, r11g11b10Pack(oC.x), 0); + value = Insert(value, r11g11b10Pack(oC.y), 1); + *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + value = Insert(value, r11g11b10Pack(oC.z), 0); + value = Insert(value, r11g11b10Pack(oC.w), 1); + *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask); } - else + break; + case VK_FORMAT_R16G16B16A16_SINT: + case VK_FORMAT_R16G16B16A16_UINT: + if((rgbaWriteMask & 0x0000000F) != 0x0) { - packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); - } - value = *Pointer<UInt2>(buffer, 16); - mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); - if(rgbaWriteMask != 0xF) - { - mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); - } - *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); - } - break; - case VK_FORMAT_A2B10G10R10_UINT_PACK32: - if((rgbaWriteMask & 0x0000000F) != 0x0) - { - Int2 mergedMask, packedCol, value; - Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) | - ((As<Int4>(oC.z) & Int4(0x3ff)) << 20) | - ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) | - ((As<Int4>(oC.x) & Int4(0x3ff))); + buffer += 8 * x; - buffer += 4 * x; - value = *Pointer<Int2>(buffer, 16); - mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); - if(rgbaWriteMask != 0xF) - { - mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0])); - } - *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask); + UInt4 rgbaMask; + UShort8 value = *Pointer<UShort8>(buffer); + UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y))); + UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16); + if((rgbaWriteMask & 0xF) != 0xF) + { + UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); + rgbaMask = UInt4(tmpMask, tmpMask); + mergedMask &= rgbaMask; + } + *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - value = *Pointer<Int2>(buffer, 16); - mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); - if(rgbaWriteMask != 0xF) - { - mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0])); + value = *Pointer<UShort8>(buffer); + packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w))); + mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16); + if((rgbaWriteMask & 0xF) != 0xF) + { + mergedMask &= rgbaMask; + } + *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); } - *Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask); - } - break; - default: - UNIMPLEMENTED("VkFormat: %d", int(targetFormat)); + break; + case VK_FORMAT_R8G8B8A8_SINT: + case VK_FORMAT_R8G8B8A8_UINT: + case VK_FORMAT_A8B8G8R8_UINT_PACK32: + case VK_FORMAT_A8B8G8R8_SINT_PACK32: + if((rgbaWriteMask & 0x0000000F) != 0x0) + { + UInt2 value, packedCol, mergedMask; + + buffer += 4 * x; + + bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32; + + if(isSigned) + { + packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); + } + else + { + packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); + } + value = *Pointer<UInt2>(buffer, 16); + mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); + if(rgbaWriteMask != 0xF) + { + mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); + } + *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); + + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + if(isSigned) + { + packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); + } + else + { + packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); + } + value = *Pointer<UInt2>(buffer, 16); + mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); + if(rgbaWriteMask != 0xF) + { + mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); + } + *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); + } + break; + case VK_FORMAT_A2B10G10R10_UINT_PACK32: + if((rgbaWriteMask & 0x0000000F) != 0x0) + { + Int2 mergedMask, packedCol, value; + Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) | + ((As<Int4>(oC.z) & Int4(0x3ff)) << 20) | + ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) | + ((As<Int4>(oC.x) & Int4(0x3ff))); + + buffer += 4 * x; + value = *Pointer<Int2>(buffer, 16); + mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); + if(rgbaWriteMask != 0xF) + { + mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0])); + } + *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask); + + buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + + value = *Pointer<Int2>(buffer, 16); + mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); + if(rgbaWriteMask != 0xF) + { + mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0])); + } + *Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask); + } + break; + default: + UNIMPLEMENTED("VkFormat: %d", int(targetFormat)); } } @@ -2604,7 +2605,7 @@ void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c) { - Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16); + Pointer<Byte> LUT = constants + OFFSET(Constants, sRGBtoLinear12_16); c.x = As<UShort4>(c.x) >> 4; c.y = As<UShort4>(c.y) >> 4; @@ -2637,7 +2638,7 @@ void PixelRoutine::linearToSRGB12_16(Vector4s &c) { - Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16); + Pointer<Byte> LUT = constants + OFFSET(Constants, linearToSRGB12_16); c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); @@ -2655,7 +2656,7 @@ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); } -Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2 +Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2 { Float4 linear = x * x; linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp index 678d780..34722ad 100644 --- a/src/Pipeline/PixelRoutine.hpp +++ b/src/Pipeline/PixelRoutine.hpp
@@ -26,16 +26,16 @@ { public: PixelRoutine(const PixelProcessor::State &state, - vk::PipelineLayout const *pipelineLayout, - SpirvShader const *spirvShader, - const vk::DescriptorSet::Bindings &descriptorSets); + vk::PipelineLayout const *pipelineLayout, + SpirvShader const *spirvShader, + const vk::DescriptorSet::Bindings &descriptorSets); virtual ~PixelRoutine(); protected: - Float4 z[4]; // Multisampled z - Float4 w; // Used as is - Float4 rhw; // Reciprocal w + Float4 z[4]; // Multisampled z + Float4 w; // Used as is + Float4 rhw; // Reciprocal w SpirvRoutine routine; const vk::DescriptorSet::Bindings &descriptorSets; @@ -43,7 +43,7 @@ // Depth output Float4 oDepth; - virtual void setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]) = 0; + virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4]) = 0; virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) = 0; virtual Bool alphaTest(Int cMask[4]) = 0; virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0; @@ -55,7 +55,7 @@ // Raster operations void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s ¤t, const Int &x); - void writeColor(int index, const Pointer<Byte> &cBuffer, const Int& x, Vector4f& oC, const Int& sMask, const Int& zMask, const Int& cMask); + void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &oC, const Int &sMask, const Int &zMask, const Int &cMask); void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4f &oC, const Int &x); void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s ¤t, const Int &sMask, const Int &zMask, const Int &cMask); @@ -93,4 +93,4 @@ } // namespace sw -#endif // sw_PixelRoutine_hpp +#endif // sw_PixelRoutine_hpp
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp index b2c2268..10d848e 100644 --- a/src/Pipeline/SamplerCore.cpp +++ b/src/Pipeline/SamplerCore.cpp
@@ -14,35 +14,35 @@ #include "SamplerCore.hpp" -#include "PixelRoutine.hpp" #include "Constants.hpp" -#include "Vulkan/VkSampler.hpp" +#include "PixelRoutine.hpp" #include "Vulkan/VkDebug.hpp" +#include "Vulkan/VkSampler.hpp" #include <limits> namespace { -void applySwizzle(VkComponentSwizzle swizzle, sw::Float4& f, const sw::Vector4f& c, bool integer) +void applySwizzle(VkComponentSwizzle swizzle, sw::Float4 &f, const sw::Vector4f &c, bool integer) { switch(swizzle) { - case VK_COMPONENT_SWIZZLE_R: f = c.x; break; - case VK_COMPONENT_SWIZZLE_G: f = c.y; break; - case VK_COMPONENT_SWIZZLE_B: f = c.z; break; - case VK_COMPONENT_SWIZZLE_A: f = c.w; break; - case VK_COMPONENT_SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break; - case VK_COMPONENT_SWIZZLE_ONE: - if(integer) - { - f = rr::As<sw::Float4>(sw::Int4(1, 1, 1, 1)); - } - else - { - f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); - } - break; - default: ASSERT(false); + case VK_COMPONENT_SWIZZLE_R: f = c.x; break; + case VK_COMPONENT_SWIZZLE_G: f = c.y; break; + case VK_COMPONENT_SWIZZLE_B: f = c.z; break; + case VK_COMPONENT_SWIZZLE_A: f = c.w; break; + case VK_COMPONENT_SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break; + case VK_COMPONENT_SWIZZLE_ONE: + if(integer) + { + f = rr::As<sw::Float4>(sw::Int4(1, 1, 1, 1)); + } + else + { + f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); + } + break; + default: ASSERT(false); } } @@ -50,11 +50,13 @@ namespace sw { -SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler &state) : constants(constants), state(state) +SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler &state) + : constants(constants) + , state(state) { } -Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4& sampleId, SamplerFunction function) +Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4 &sampleId, SamplerFunction function) { Vector4f c; @@ -119,7 +121,8 @@ { lod = Float(0); } - else UNREACHABLE("Sampler function %d", int(function)); + else + UNREACHABLE("Sampler function %d", int(function)); if(function != Base && function != Fetch && function != Gather) { @@ -139,7 +142,7 @@ } c.x = lod; - // c.y contains unclamped LOD. + // c.y contains unclamped LOD. return c; } @@ -159,31 +162,85 @@ { switch(state.textureFormat) { + case VK_FORMAT_R5G6B5_UNORM_PACK16: + c.x *= Float4(1.0f / 0xF800); + c.y *= Float4(1.0f / 0xFC00); + c.z *= Float4(1.0f / 0xF800); + break; + case VK_FORMAT_B4G4R4A4_UNORM_PACK16: + c.x *= Float4(1.0f / 0xF000); + c.y *= Float4(1.0f / 0xF000); + c.z *= Float4(1.0f / 0xF000); + c.w *= Float4(1.0f / 0xF000); + break; + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + c.x *= Float4(1.0f / 0xF800); + c.y *= Float4(1.0f / 0xF800); + c.z *= Float4(1.0f / 0xF800); + c.w *= Float4(1.0f / 0x8000); + break; + case VK_FORMAT_R8_SNORM: + case VK_FORMAT_R8G8_SNORM: + case VK_FORMAT_R8G8B8A8_SNORM: + case VK_FORMAT_A8B8G8R8_SNORM_PACK32: + c.x *= Float4(1.0f / 0x7F00); + c.y *= Float4(1.0f / 0x7F00); + c.z *= Float4(1.0f / 0x7F00); + c.w *= Float4(1.0f / 0x7F00); + break; + case VK_FORMAT_R8_UNORM: + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_B8G8R8A8_SRGB: + case VK_FORMAT_R8G8B8A8_SRGB: + case VK_FORMAT_R8_SRGB: + case VK_FORMAT_R8G8_SRGB: + c.x *= Float4(1.0f / 0xFF00u); + c.y *= Float4(1.0f / 0xFF00u); + c.z *= Float4(1.0f / 0xFF00u); + c.w *= Float4(1.0f / 0xFF00u); + break; + default: + for(int component = 0; component < textureComponentCount(); component++) + { + c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF); + } + } + } + } + else // 16-bit filtering. + { + Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, function); + + switch(state.textureFormat) + { case VK_FORMAT_R5G6B5_UNORM_PACK16: - c.x *= Float4(1.0f / 0xF800); - c.y *= Float4(1.0f / 0xFC00); - c.z *= Float4(1.0f / 0xF800); + c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); + c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00); + c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); break; case VK_FORMAT_B4G4R4A4_UNORM_PACK16: - c.x *= Float4(1.0f / 0xF000); - c.y *= Float4(1.0f / 0xF000); - c.z *= Float4(1.0f / 0xF000); - c.w *= Float4(1.0f / 0xF000); + c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000); + c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000); + c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000); + c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000); break; case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - c.x *= Float4(1.0f / 0xF800); - c.y *= Float4(1.0f / 0xF800); - c.z *= Float4(1.0f / 0xF800); - c.w *= Float4(1.0f / 0x8000); + c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); + c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800); + c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); + c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000); break; case VK_FORMAT_R8_SNORM: case VK_FORMAT_R8G8_SNORM: case VK_FORMAT_R8G8B8A8_SNORM: case VK_FORMAT_A8B8G8R8_SNORM_PACK32: - c.x *= Float4(1.0f / 0x7F00); - c.y *= Float4(1.0f / 0x7F00); - c.z *= Float4(1.0f / 0x7F00); - c.w *= Float4(1.0f / 0x7F00); + c.x = Float4(cs.x) * Float4(1.0f / 0x7F00); + c.y = Float4(cs.y) * Float4(1.0f / 0x7F00); + c.z = Float4(cs.z) * Float4(1.0f / 0x7F00); + c.w = Float4(cs.w) * Float4(1.0f / 0x7F00); break; case VK_FORMAT_R8_UNORM: case VK_FORMAT_R8G8_UNORM: @@ -194,77 +251,23 @@ case VK_FORMAT_R8G8B8A8_SRGB: case VK_FORMAT_R8_SRGB: case VK_FORMAT_R8G8_SRGB: - c.x *= Float4(1.0f / 0xFF00u); - c.y *= Float4(1.0f / 0xFF00u); - c.z *= Float4(1.0f / 0xFF00u); - c.w *= Float4(1.0f / 0xFF00u); + c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xFF00u); + c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFF00u); + c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xFF00u); + c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xFF00u); break; default: for(int component = 0; component < textureComponentCount(); component++) { - c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF); + if(hasUnsignedTextureComponent(component)) + { + convertUnsigned16(c[component], cs[component]); + } + else + { + convertSigned15(c[component], cs[component]); + } } - } - } - } - else // 16-bit filtering. - { - Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, function); - - switch(state.textureFormat) - { - case VK_FORMAT_R5G6B5_UNORM_PACK16: - c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); - c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00); - c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); - break; - case VK_FORMAT_B4G4R4A4_UNORM_PACK16: - c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000); - c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000); - c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000); - c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000); - break; - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); - c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800); - c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); - c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000); - break; - case VK_FORMAT_R8_SNORM: - case VK_FORMAT_R8G8_SNORM: - case VK_FORMAT_R8G8B8A8_SNORM: - case VK_FORMAT_A8B8G8R8_SNORM_PACK32: - c.x = Float4(cs.x) * Float4(1.0f / 0x7F00); - c.y = Float4(cs.y) * Float4(1.0f / 0x7F00); - c.z = Float4(cs.z) * Float4(1.0f / 0x7F00); - c.w = Float4(cs.w) * Float4(1.0f / 0x7F00); - break; - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_B8G8R8A8_SRGB: - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_R8_SRGB: - case VK_FORMAT_R8G8_SRGB: - c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xFF00u); - c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFF00u); - c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xFF00u); - c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xFF00u); - break; - default: - for(int component = 0; component < textureComponentCount(); component++) - { - if(hasUnsignedTextureComponent(component)) - { - convertUnsigned16(c[component], cs[component]); - } - else - { - convertSigned15(c[component], cs[component]); - } - } } } @@ -321,27 +324,27 @@ { switch(count) { - case -1: return uvw - offset; - case 0: return uvw; - case +1: return uvw + offset; - case 2: return uvw + offset + offset; + case -1: return uvw - offset; + case 0: return uvw; + case +1: return uvw + offset; + case 2: return uvw + offset + offset; } } - else // Clamp or mirror + else // Clamp or mirror { switch(count) { - case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset)); - case 0: return uvw; - case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset)); - case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset)); + case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset)); + case 0: return uvw; + case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset)); + case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset)); } } return uvw; } -Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function) +Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function) { Vector4s c = sampleAniso(texture, u, v, w, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, false, function); @@ -356,21 +359,45 @@ lod *= Float(1 << 16); - UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize - Short4 stri = utri >> 1; // FIXME: Optimize + UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize + Short4 stri = utri >> 1; // FIXME: Optimize - if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri); - if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri); - if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri); - if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri); + if(hasUnsignedTextureComponent(0)) + cc.x = MulHigh(As<UShort4>(cc.x), utri); + else + cc.x = MulHigh(cc.x, stri); + if(hasUnsignedTextureComponent(1)) + cc.y = MulHigh(As<UShort4>(cc.y), utri); + else + cc.y = MulHigh(cc.y, stri); + if(hasUnsignedTextureComponent(2)) + cc.z = MulHigh(As<UShort4>(cc.z), utri); + else + cc.z = MulHigh(cc.z, stri); + if(hasUnsignedTextureComponent(3)) + cc.w = MulHigh(As<UShort4>(cc.w), utri); + else + cc.w = MulHigh(cc.w, stri); utri = ~utri; stri = Short4(0x7FFF) - stri; - if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri); - if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri); - if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri); - if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri); + if(hasUnsignedTextureComponent(0)) + c.x = MulHigh(As<UShort4>(c.x), utri); + else + c.x = MulHigh(c.x, stri); + if(hasUnsignedTextureComponent(1)) + c.y = MulHigh(As<UShort4>(c.y), utri); + else + c.y = MulHigh(c.y, stri); + if(hasUnsignedTextureComponent(2)) + c.z = MulHigh(As<UShort4>(c.z), utri); + else + c.z = MulHigh(c.z, stri); + if(hasUnsignedTextureComponent(3)) + c.w = MulHigh(As<UShort4>(c.w), utri); + else + c.w = MulHigh(c.w, stri); c.x += cc.x; c.y += cc.y; @@ -386,7 +413,7 @@ return c; } -Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function) +Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function) { Vector4s c; @@ -405,9 +432,9 @@ cSum.z = Short4(0); cSum.w = Short4(0); - Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); - Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); - UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a); + Float4 A = *Pointer<Float4>(constants + OFFSET(Constants, uvWeight) + 16 * a); + Float4 B = *Pointer<Float4>(constants + OFFSET(Constants, uvStart) + 16 * a); + UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants, cWeight) + 8 * a); Short4 sw = Short4(cw >> 1); Float4 du = uDelta; @@ -428,25 +455,49 @@ u0 += du; v0 += dv; - if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw); - if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw); - if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw); - if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw); + if(hasUnsignedTextureComponent(0)) + cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); + else + cSum.x += MulHigh(c.x, sw); + if(hasUnsignedTextureComponent(1)) + cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); + else + cSum.y += MulHigh(c.y, sw); + if(hasUnsignedTextureComponent(2)) + cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); + else + cSum.z += MulHigh(c.z, sw); + if(hasUnsignedTextureComponent(3)) + cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); + else + cSum.w += MulHigh(c.w, sw); i++; } Until(i >= a); - if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x); - if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y); - if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z); - if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w); + if(hasUnsignedTextureComponent(0)) + c.x = cSum.x; + else + c.x = AddSat(cSum.x, cSum.x); + if(hasUnsignedTextureComponent(1)) + c.y = cSum.y; + else + c.y = AddSat(cSum.y, cSum.y); + if(hasUnsignedTextureComponent(2)) + c.z = cSum.z; + else + c.z = AddSat(cSum.z, cSum.z); + if(hasUnsignedTextureComponent(3)) + c.w = cSum.w; + else + c.w = AddSat(cSum.w, cSum.w); } return c; } -Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function) +Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function) { if(state.textureType != VK_IMAGE_VIEW_TYPE_3D) { @@ -458,7 +509,7 @@ } } -Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function) +Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function) { Vector4s c; @@ -487,21 +538,21 @@ } else { - Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod); - Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod); - Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod); - Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod); + Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod); + Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod); + Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod); + Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod); Vector4s c00 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function); Vector4s c10 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function); Vector4s c01 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function); Vector4s c11 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, cubeArrayId, sampleId, buffer, function); - if(!gather) // Blend + if(!gather) // Blend { // Fractions - UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,width))); - UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,height))); + UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width))); + UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height))); UShort4 f1u = ~f0u; UShort4 f1v = ~f0v; @@ -532,7 +583,7 @@ { c00.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0u) + MulHigh(As<UShort4>(c10.x), f0u); c01.x = As<UShort4>(c01.x) - MulHigh(As<UShort4>(c01.x), f0u) + MulHigh(As<UShort4>(c11.x), f0u); - c.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v); + c.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v); } else { @@ -552,7 +603,7 @@ } c.x = (c00.x + c10.x) + (c01.x + c11.x); - if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions + if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions } } @@ -562,7 +613,7 @@ { c00.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0u) + MulHigh(As<UShort4>(c10.y), f0u); c01.y = As<UShort4>(c01.y) - MulHigh(As<UShort4>(c01.y), f0u) + MulHigh(As<UShort4>(c11.y), f0u); - c.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v); + c.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v); } else { @@ -582,7 +633,7 @@ } c.y = (c00.y + c10.y) + (c01.y + c11.y); - if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions + if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions } } @@ -592,7 +643,7 @@ { c00.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0u) + MulHigh(As<UShort4>(c10.z), f0u); c01.z = As<UShort4>(c01.z) - MulHigh(As<UShort4>(c01.z), f0u) + MulHigh(As<UShort4>(c11.z), f0u); - c.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v); + c.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v); } else { @@ -612,7 +663,7 @@ } c.z = (c00.z + c10.z) + (c01.z + c11.z); - if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions + if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions } } @@ -622,7 +673,7 @@ { c00.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0u) + MulHigh(As<UShort4>(c10.w), f0u); c01.w = As<UShort4>(c01.w) - MulHigh(As<UShort4>(c01.w), f0u) + MulHigh(As<UShort4>(c11.w), f0u); - c.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v); + c.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v); } else { @@ -642,7 +693,7 @@ } c.w = (c00.w + c10.w) + (c01.w + c11.w); - if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions + if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions } } } @@ -651,16 +702,16 @@ VkComponentSwizzle swizzle = gatherSwizzle(); switch(swizzle) { - case VK_COMPONENT_SWIZZLE_ZERO: - case VK_COMPONENT_SWIZZLE_ONE: - // Handled at the final component swizzle. - break; - default: - c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R]; - break; + case VK_COMPONENT_SWIZZLE_ZERO: + case VK_COMPONENT_SWIZZLE_ONE: + // Handled at the final component swizzle. + break; + default: + c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R]; + break; } } } @@ -668,7 +719,7 @@ return c; } -Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function) +Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function) { Vector4s c_; @@ -708,17 +759,17 @@ { for(int k = 0; k < 2; k++) { - u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod); - v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod); - s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod); + u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod); + v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod); + s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap, wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod); } } } // Fractions - UShort4 f0u = As<UShort4>(u[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,width))); - UShort4 f0v = As<UShort4>(v[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,height))); - UShort4 f0s = As<UShort4>(s[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap,depth))); + UShort4 f0u = As<UShort4>(u[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width))); + UShort4 f0v = As<UShort4>(v[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height))); + UShort4 f0s = As<UShort4>(s[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, depth))); UShort4 f1u = ~f0u; UShort4 f1v = ~f0v; @@ -766,10 +817,34 @@ { c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, cubeArrayId, sampleId, buffer, function); - if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); } - if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); } - if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); } - if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); } + if(componentCount >= 1) + { + if(hasUnsignedTextureComponent(0)) + c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); + else + c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); + } + if(componentCount >= 2) + { + if(hasUnsignedTextureComponent(1)) + c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); + else + c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); + } + if(componentCount >= 3) + { + if(hasUnsignedTextureComponent(2)) + c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); + else + c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); + } + if(componentCount >= 4) + { + if(hasUnsignedTextureComponent(3)) + c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); + else + c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); + } if(i != 0 || j != 0 || k != 0) { @@ -788,16 +863,20 @@ if(componentCount >= 4) c_.w = c[0][0][0].w; // Correct for signed fractions - if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x); - if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y); - if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z); - if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w); + if(componentCount >= 1) + if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x); + if(componentCount >= 2) + if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y); + if(componentCount >= 3) + if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z); + if(componentCount >= 4) + if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w); } return c_; } -Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function) +Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function) { Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, cubeArrayCoord, sampleId, lod, anisotropy, uDelta, vDelta, false, function); @@ -821,7 +900,7 @@ return c; } -Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function) +Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function) { Vector4f c; @@ -840,8 +919,8 @@ cSum.z = Float4(0.0f); cSum.w = Float4(0.0f); - Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); - Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); + Float4 A = *Pointer<Float4>(constants + OFFSET(Constants, uvWeight) + 16 * a); + Float4 B = *Pointer<Float4>(constants + OFFSET(Constants, uvStart) + 16 * a); Float4 du = uDelta; Float4 dv = vDelta; @@ -879,7 +958,7 @@ return c; } -Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function) +Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function) { if(state.textureType != VK_IMAGE_VIEW_TYPE_3D) { @@ -891,7 +970,7 @@ } } -Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function) +Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function) { Vector4f c; @@ -935,7 +1014,7 @@ Vector4f c01 = sampleTexel(x0, y1, z0, q, mipmap, cubeArrayId, sampleId, buffer, function); Vector4f c11 = sampleTexel(x1, y1, z0, q, mipmap, cubeArrayId, sampleId, buffer, function); - if(!gather) // Blend + if(!gather) // Blend { if(componentCount >= 1) c00.x = c00.x + fu * (c10.x - c00.x); if(componentCount >= 2) c00.y = c00.y + fu * (c10.y - c00.y); @@ -957,16 +1036,16 @@ VkComponentSwizzle swizzle = gatherSwizzle(); switch(swizzle) { - case VK_COMPONENT_SWIZZLE_ZERO: - case VK_COMPONENT_SWIZZLE_ONE: - // Handled at the final component swizzle. - break; - default: - c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R]; - c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R]; - break; + case VK_COMPONENT_SWIZZLE_ZERO: + case VK_COMPONENT_SWIZZLE_ONE: + // Handled at the final component swizzle. + break; + default: + c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R]; + c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R]; + break; } } } @@ -974,7 +1053,7 @@ return c; } -Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function) +Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function) { Vector4f c; @@ -1065,18 +1144,18 @@ Float SamplerCore::log2sqrt(Float lod) { // log2(sqrt(lod)) // Equals 0.25 * log2(lod^2). - lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. - lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. - lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length). + lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. + lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. + lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length). return lod; } Float SamplerCore::log2(Float lod) { - lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. - lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. - lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length). + lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. + lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. + lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length). return lod; } @@ -1085,7 +1164,7 @@ { Float4 duvdxy; - if(function != Grad) // Implicit + if(function != Grad) // Implicit { duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx); } @@ -1103,7 +1182,7 @@ Float4 dUV2dxy = dUVdxy * dUVdxy; Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw; - lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis + lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis if(state.textureFilter == FILTER_ANISOTROPIC) { @@ -1119,12 +1198,12 @@ vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask))); anisotropy = lod * Rcp_pp(det); - anisotropy = Min(anisotropy, *Pointer<Float>(sampler + OFFSET(vk::Sampler,maxAnisotropy))); + anisotropy = Min(anisotropy, *Pointer<Float>(sampler + OFFSET(vk::Sampler, maxAnisotropy))); lod *= Rcp_pp(anisotropy * anisotropy); } - lod = log2sqrt(lod); // log2(sqrt(lod)) + lod = log2sqrt(lod); // log2(sqrt(lod)) } void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, Float4 &M, SamplerFunction function) @@ -1160,10 +1239,10 @@ dudxy = Max(Max(duvdxy, dusdxy), dvsdxy); - lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z); + lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z); // Scale by texture dimension. - lod *= *Pointer<Float>(texture + OFFSET(Texture,width)); + lod *= *Pointer<Float>(texture + OFFSET(Texture, width)); lod = log2(lod); } @@ -1172,7 +1251,7 @@ { Float4 dudxy, dvdxy, dsdxy; - if(function != Grad) // Implicit + if(function != Grad) // Implicit { dudxy = uuuu - uuuu.xxxx; dvdxy = vvvv - vvvv.xxxx; @@ -1197,9 +1276,9 @@ dudxy += dvdxy; dudxy += dsdxy; - lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z); + lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z); - lod = log2sqrt(lod); // log2(sqrt(lod)) + lod = log2sqrt(lod); // log2(sqrt(lod)) } Int4 SamplerCore::cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M) @@ -1207,20 +1286,20 @@ // TODO: Comply with Vulkan recommendation: // Vulkan 1.1: "The rules should have as the first rule that rz wins over ry and rx, and the second rule that ry wins over rx." - Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0 - Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0 - Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0 + Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0 + Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0 + Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0 Float4 absX = Abs(x); Float4 absY = Abs(y); Float4 absZ = Abs(z); - Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y) - Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z) - Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x) - Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z) - Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x) - Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y) + Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y) + Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z) + Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x) + Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z) + Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x) + Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y) // FACE_POSITIVE_X = 000b // FACE_NEGATIVE_X = 001b @@ -1235,14 +1314,14 @@ Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000); Int negative = SignMask(n); - Int faces = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4); - faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4); - faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4); + Int faces = *Pointer<Int>(constants + OFFSET(Constants, transposeBit0) + negative * 4); + faces |= *Pointer<Int>(constants + OFFSET(Constants, transposeBit1) + yAxis * 4); + faces |= *Pointer<Int>(constants + OFFSET(Constants, transposeBit2) + zAxis * 4); Int4 face; face.x = faces & 0x7; - face.y = (faces >> 4) & 0x7; - face.z = (faces >> 8) & 0x7; + face.y = (faces >> 4) & 0x7; + face.z = (faces >> 8) & 0x7; face.w = (faces >> 12) & 0x7; M = Max(Max(absX, absY), Max(absZ, Float4(std::numeric_limits<float>::min()))); @@ -1267,27 +1346,27 @@ switch(mode) { - case AddressingMode::ADDRESSING_WRAP: - tmp = (tmp + whd * Int4(-MIN_TEXEL_OFFSET)) % whd; - break; - case AddressingMode::ADDRESSING_CLAMP: - case AddressingMode::ADDRESSING_MIRROR: - case AddressingMode::ADDRESSING_MIRRORONCE: - case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER - tmp = Min(Max(tmp, Int4(0)), whd - Int4(1)); - break; - case ADDRESSING_TEXELFETCH: - break; - case AddressingMode::ADDRESSING_SEAMLESS: - ASSERT(false); // Cube sampling doesn't support offset. - default: - ASSERT(false); + case AddressingMode::ADDRESSING_WRAP: + tmp = (tmp + whd * Int4(-MIN_TEXEL_OFFSET)) % whd; + break; + case AddressingMode::ADDRESSING_CLAMP: + case AddressingMode::ADDRESSING_MIRROR: + case AddressingMode::ADDRESSING_MIRRORONCE: + case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER + tmp = Min(Max(tmp, Int4(0)), whd - Int4(1)); + break; + case ADDRESSING_TEXELFETCH: + break; + case AddressingMode::ADDRESSING_SEAMLESS: + ASSERT(false); // Cube sampling doesn't support offset. + default: + ASSERT(false); } return As<Short4>(UShort4(tmp)); } -void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, SamplerFunction function) +void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function) { bool texelFetch = (function == Fetch); bool hasOffset = (function.offset != 0); @@ -1309,8 +1388,8 @@ Short4 uuu2 = uuuu; uuuu = As<Short4>(UnpackLow(uuuu, vvvv)); uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv)); - uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP)))); - uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP)))); + uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap, onePitchP)))); + uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap, onePitchP)))); if(hasThirdCoordinate()) { @@ -1380,7 +1459,7 @@ } } -void SamplerCore::computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, SamplerFunction function) +void SamplerCore::computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function) { UInt4 indices = uuuu + vvvv; @@ -1426,32 +1505,32 @@ switch(state.textureFormat) { - case VK_FORMAT_R5G6B5_UNORM_PACK16: - c.z = (c.x & Short4(0x001Fu)) << 11; - c.y = (c.x & Short4(0x07E0u)) << 5; - c.x = (c.x & Short4(0xF800u)); - break; - case VK_FORMAT_B4G4R4A4_UNORM_PACK16: - c.w = (c.x << 12) & Short4(0xF000u); - c.z = (c.x) & Short4(0xF000u); - c.y = (c.x << 4) & Short4(0xF000u); - c.x = (c.x << 8) & Short4(0xF000u); - break; - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - c.w = (c.x) & Short4(0x8000u); - c.z = (c.x << 11) & Short4(0xF800u); - c.y = (c.x << 6) & Short4(0xF800u); - c.x = (c.x << 1) & Short4(0xF800u); - break; - default: - ASSERT(false); + case VK_FORMAT_R5G6B5_UNORM_PACK16: + c.z = (c.x & Short4(0x001Fu)) << 11; + c.y = (c.x & Short4(0x07E0u)) << 5; + c.x = (c.x & Short4(0xF800u)); + break; + case VK_FORMAT_B4G4R4A4_UNORM_PACK16: + c.w = (c.x << 12) & Short4(0xF000u); + c.z = (c.x) & Short4(0xF000u); + c.y = (c.x << 4) & Short4(0xF000u); + c.x = (c.x << 8) & Short4(0xF000u); + break; + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + c.w = (c.x) & Short4(0x8000u); + c.z = (c.x << 11) & Short4(0xF800u); + c.y = (c.x << 6) & Short4(0xF800u); + c.x = (c.x << 1) & Short4(0xF800u); + break; + default: + ASSERT(false); } } else if(has8bitTextureComponents()) { switch(textureComponentCount()) { - case 4: + case 4: { Byte4 c0 = Pointer<Byte4>(buffer)[index[0]]; Byte4 c1 = Pointer<Byte4>(buffer)[index[1]]; @@ -1462,80 +1541,80 @@ switch(state.textureFormat) { - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - c.z = As<Short4>(UnpackLow(c.x, c.y)); - c.x = As<Short4>(UnpackHigh(c.x, c.y)); - c.y = c.z; - c.w = c.x; - c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z)); - c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y)); - c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x)); - c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w)); - break; - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SINT: - case VK_FORMAT_R8G8B8A8_SNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - c.z = As<Short4>(UnpackHigh(c.x, c.y)); - c.x = As<Short4>(UnpackLow(c.x, c.y)); - c.y = c.x; - c.w = c.z; - c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x)); - c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y)); - c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z)); - c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w)); - // Propagate sign bit - if(state.textureFormat == VK_FORMAT_R8G8B8A8_SINT) - { - c.x >>= 8; - c.y >>= 8; - c.z >>= 8; - c.w >>= 8; - } - break; - case VK_FORMAT_R8G8B8A8_UINT: - c.z = As<Short4>(UnpackHigh(c.x, c.y)); - c.x = As<Short4>(UnpackLow(c.x, c.y)); - c.y = c.x; - c.w = c.z; - c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0))); - c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0))); - c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0))); - c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0))); - break; - default: - ASSERT(false); + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_B8G8R8A8_SRGB: + c.z = As<Short4>(UnpackLow(c.x, c.y)); + c.x = As<Short4>(UnpackHigh(c.x, c.y)); + c.y = c.z; + c.w = c.x; + c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z)); + c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y)); + c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x)); + c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w)); + break; + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8B8A8_SINT: + case VK_FORMAT_R8G8B8A8_SNORM: + case VK_FORMAT_R8G8B8A8_SRGB: + c.z = As<Short4>(UnpackHigh(c.x, c.y)); + c.x = As<Short4>(UnpackLow(c.x, c.y)); + c.y = c.x; + c.w = c.z; + c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x)); + c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y)); + c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z)); + c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w)); + // Propagate sign bit + if(state.textureFormat == VK_FORMAT_R8G8B8A8_SINT) + { + c.x >>= 8; + c.y >>= 8; + c.z >>= 8; + c.w >>= 8; + } + break; + case VK_FORMAT_R8G8B8A8_UINT: + c.z = As<Short4>(UnpackHigh(c.x, c.y)); + c.x = As<Short4>(UnpackLow(c.x, c.y)); + c.y = c.x; + c.w = c.z; + c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0))); + c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0))); + c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0))); + c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0))); + break; + default: + ASSERT(false); } } break; - case 2: - c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0); - c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1); - c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2); - c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3); + case 2: + c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0); + c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1); + c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2); + c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3); - switch(state.textureFormat) - { - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8_SNORM: - case VK_FORMAT_R8G8_SRGB: - c.y = (c.x & Short4(0xFF00u)); - c.x = (c.x << 8); + switch(state.textureFormat) + { + case VK_FORMAT_R8G8_UNORM: + case VK_FORMAT_R8G8_SNORM: + case VK_FORMAT_R8G8_SRGB: + c.y = (c.x & Short4(0xFF00u)); + c.x = (c.x << 8); + break; + case VK_FORMAT_R8G8_SINT: + c.y = c.x >> 8; + c.x = (c.x << 8) >> 8; // Propagate sign bit + break; + case VK_FORMAT_R8G8_UINT: + c.y = As<Short4>(As<UShort4>(c.x) >> 8); + c.x &= Short4(0x00FFu); + break; + default: + ASSERT(false); + } break; - case VK_FORMAT_R8G8_SINT: - c.y = c.x >> 8; - c.x = (c.x << 8) >> 8; // Propagate sign bit - break; - case VK_FORMAT_R8G8_UINT: - c.y = As<Short4>(As<UShort4>(c.x) >> 8); - c.x &= Short4(0x00FFu); - break; - default: - ASSERT(false); - } - break; - case 1: + case 1: { Int c0 = Int(*Pointer<Byte>(buffer + index[0])); Int c1 = Int(*Pointer<Byte>(buffer + index[1])); @@ -1545,9 +1624,9 @@ switch(state.textureFormat) { - case VK_FORMAT_R8_SINT: - case VK_FORMAT_R8_UINT: - case VK_FORMAT_S8_UINT: + case VK_FORMAT_R8_SINT: + case VK_FORMAT_R8_UINT: + case VK_FORMAT_S8_UINT: { Int zero(0); c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero)); @@ -1558,58 +1637,58 @@ } } break; - case VK_FORMAT_R8_SNORM: - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R8_SRGB: - // TODO: avoid populating the low bits at all. - c.x = Unpack(As<Byte4>(c0)); - c.x &= Short4(0xFF00u); - break; - default: - c.x = Unpack(As<Byte4>(c0)); - break; + case VK_FORMAT_R8_SNORM: + case VK_FORMAT_R8_UNORM: + case VK_FORMAT_R8_SRGB: + // TODO: avoid populating the low bits at all. + c.x = Unpack(As<Byte4>(c0)); + c.x &= Short4(0xFF00u); + break; + default: + c.x = Unpack(As<Byte4>(c0)); + break; } } break; - default: - ASSERT(false); + default: + ASSERT(false); } } else if(has16bitTextureComponents()) { switch(textureComponentCount()) { - case 4: - c.x = Pointer<Short4>(buffer)[index[0]]; - c.y = Pointer<Short4>(buffer)[index[1]]; - c.z = Pointer<Short4>(buffer)[index[2]]; - c.w = Pointer<Short4>(buffer)[index[3]]; - transpose4x4(c.x, c.y, c.z, c.w); - break; - case 3: - c.x = Pointer<Short4>(buffer)[index[0]]; - c.y = Pointer<Short4>(buffer)[index[1]]; - c.z = Pointer<Short4>(buffer)[index[2]]; - c.w = Pointer<Short4>(buffer)[index[3]]; - transpose4x3(c.x, c.y, c.z, c.w); - break; - case 2: - c.x = *Pointer<Short4>(buffer + 4 * index[0]); - c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1]))); - c.z = *Pointer<Short4>(buffer + 4 * index[2]); - c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3]))); - c.y = c.x; - c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z)); - c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z)); - break; - case 1: - c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0); - c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1); - c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2); - c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3); - break; - default: - ASSERT(false); + case 4: + c.x = Pointer<Short4>(buffer)[index[0]]; + c.y = Pointer<Short4>(buffer)[index[1]]; + c.z = Pointer<Short4>(buffer)[index[2]]; + c.w = Pointer<Short4>(buffer)[index[3]]; + transpose4x4(c.x, c.y, c.z, c.w); + break; + case 3: + c.x = Pointer<Short4>(buffer)[index[0]]; + c.y = Pointer<Short4>(buffer)[index[1]]; + c.z = Pointer<Short4>(buffer)[index[2]]; + c.w = Pointer<Short4>(buffer)[index[3]]; + transpose4x3(c.x, c.y, c.z, c.w); + break; + case 2: + c.x = *Pointer<Short4>(buffer + 4 * index[0]); + c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1]))); + c.z = *Pointer<Short4>(buffer + 4 * index[2]); + c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3]))); + c.y = c.x; + c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z)); + c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z)); + break; + case 1: + c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0); + c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1); + c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2); + c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3); + break; + default: + ASSERT(false); } } else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UNORM_PACK32) @@ -1639,12 +1718,13 @@ cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2); cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3); - c.x = Short4(((cc) & Int4(0x3FF))); + c.x = Short4(((cc)&Int4(0x3FF))); c.y = Short4(((cc >> 10) & Int4(0x3FF))); c.z = Short4(((cc >> 20) & Int4(0x3FF))); c.w = Short4(((cc >> 30) & Int4(0x3))); } - else ASSERT(false); + else + ASSERT(false); if(state.textureFormat.isSRGBformat()) { @@ -1660,7 +1740,7 @@ return c; } -Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function) +Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function) { Vector4s c; @@ -1670,7 +1750,7 @@ if(isYcbcrFormat()) { // Pointers to the planes of YCbCr images are stored in consecutive mipmap levels. - Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); + Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats. Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); @@ -1715,7 +1795,8 @@ U = (UV & Short4(0x00FFu)) | (UV << 8); V = (UV & Short4(0xFF00u)) | As<Short4>(As<UShort4>(UV) >> 8); } - else UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat); + else + UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat); if(!state.swappedChroma) { @@ -1733,19 +1814,19 @@ { // YCbCr formats are treated as signed 15-bit. c.x = Cr >> 1; - c.y = Y >> 1; + c.y = Y >> 1; c.z = Cb >> 1; } else { // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240] // Scale down by 0x0101 to normalize the 8.8 samples, and up by 0x7FFF for signed 15-bit output. - float yOffset = static_cast<float>(state.studioSwing ? 16 * 0x0101 : 0); + float yOffset = static_cast<float>(state.studioSwing ? 16 * 0x0101 : 0); float uvOffset = static_cast<float>(128 * 0x0101); - float yFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 219 * 0x0101 : 255 * 0x0101); + float yFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 219 * 0x0101 : 255 * 0x0101); float uvFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 224 * 0x0101 : 255 * 0x0101); - Float4 y = (Float4(Y) - Float4(yOffset)) * Float4(yFactor); + Float4 y = (Float4(Y) - Float4(yOffset)) * Float4(yFactor); Float4 u = (Float4(Cb) - Float4(uvOffset)) * Float4(uvFactor); Float4 v = (Float4(Cr) - Float4(uvOffset)) * Float4(uvFactor); @@ -1767,20 +1848,20 @@ switch(state.ycbcrModel) { - case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709: - Kb = 0.0722f; - Kr = 0.2126f; - break; - case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: - Kb = 0.114f; - Kr = 0.299f; - break; - case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020: - Kb = 0.0593f; - Kr = 0.2627f; - break; - default: - UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel)); + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709: + Kb = 0.0722f; + Kr = 0.2126f; + break; + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: + Kb = 0.114f; + Kr = 0.299f; + break; + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020: + Kb = 0.0593f; + Kr = 0.2627f; + break; + default: + UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel)); } const float Kg = 1.0f - Kr - Kb; @@ -1790,9 +1871,9 @@ const float Gr = -2 * Kr * (1 - Kr) / Kg; const float Bb = 2 * (1 - Kb); - Float4 r = y + Float4(Rr) * v; + Float4 r = y + Float4(Rr) * v; Float4 g = y + Float4(Gb) * u + Float4(Gr) * v; - Float4 b = y + Float4(Bb) * u ; + Float4 b = y + Float4(Bb) * u; c.x = Short4(r); c.y = Short4(g); @@ -1808,7 +1889,7 @@ return c; } -Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function) +Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function) { Int4 valid; @@ -1832,111 +1913,111 @@ { switch(state.textureFormat) { - case VK_FORMAT_R16_SFLOAT: - t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2)); - t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2)); - t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2)); - t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2)); + case VK_FORMAT_R16_SFLOAT: + t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2)); + t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2)); + t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2)); + t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2)); - c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0); - c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0); - c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0); - c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0); - break; - case VK_FORMAT_R16G16_SFLOAT: - t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4)); - t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4)); - t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4)); - t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4)); + c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0); + c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0); + c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0); + c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0); + break; + case VK_FORMAT_R16G16_SFLOAT: + t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4)); + t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4)); + t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4)); + t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4)); - // FIXME: shuffles - c.x = As<Float4>(halfToFloatBits(t0)); - c.y = As<Float4>(halfToFloatBits(t1)); - c.z = As<Float4>(halfToFloatBits(t2)); - c.w = As<Float4>(halfToFloatBits(t3)); - transpose4x4(c.x, c.y, c.z, c.w); - break; - case VK_FORMAT_R16G16B16A16_SFLOAT: - t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8)); - t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8)); - t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8)); - t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8)); + // FIXME: shuffles + c.x = As<Float4>(halfToFloatBits(t0)); + c.y = As<Float4>(halfToFloatBits(t1)); + c.z = As<Float4>(halfToFloatBits(t2)); + c.w = As<Float4>(halfToFloatBits(t3)); + transpose4x4(c.x, c.y, c.z, c.w); + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8)); + t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8)); + t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8)); + t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8)); - c.x = As<Float4>(halfToFloatBits(t0)); - c.y = As<Float4>(halfToFloatBits(t1)); - c.z = As<Float4>(halfToFloatBits(t2)); - c.w = As<Float4>(halfToFloatBits(t3)); - transpose4x4(c.x, c.y, c.z, c.w); - break; - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_R32_SINT: - case VK_FORMAT_R32_UINT: - case VK_FORMAT_D32_SFLOAT: - // FIXME: Optimal shuffling? - c.x.x = *Pointer<Float>(buffer + index[0] * 4); - c.x.y = *Pointer<Float>(buffer + index[1] * 4); - c.x.z = *Pointer<Float>(buffer + index[2] * 4); - c.x.w = *Pointer<Float>(buffer + index[3] * 4); - break; - case VK_FORMAT_R32G32_SFLOAT: - case VK_FORMAT_R32G32_SINT: - case VK_FORMAT_R32G32_UINT: - // FIXME: Optimal shuffling? - c.x.xy = *Pointer<Float4>(buffer + index[0] * 8); - c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8); - c.z.xy = *Pointer<Float4>(buffer + index[2] * 8); - c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8); - c.y = c.x; - c.x = Float4(c.x.xz, c.z.xz); - c.y = Float4(c.y.yw, c.z.yw); - break; - case VK_FORMAT_R32G32B32_SFLOAT: - case VK_FORMAT_R32G32B32_SINT: - case VK_FORMAT_R32G32B32_UINT: - c.x = *Pointer<Float4>(buffer + index[0] * 16, 16); - c.y = *Pointer<Float4>(buffer + index[1] * 16, 16); - c.z = *Pointer<Float4>(buffer + index[2] * 16, 16); - c.w = *Pointer<Float4>(buffer + index[3] * 16, 16); - transpose4x3(c.x, c.y, c.z, c.w); - break; - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SINT: - case VK_FORMAT_R32G32B32A32_UINT: - c.x = *Pointer<Float4>(buffer + index[0] * 16, 16); - c.y = *Pointer<Float4>(buffer + index[1] * 16, 16); - c.z = *Pointer<Float4>(buffer + index[2] * 16, 16); - c.w = *Pointer<Float4>(buffer + index[3] * 16, 16); - transpose4x4(c.x, c.y, c.z, c.w); - break; - case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: - { - Float4 t; // TODO: add Insert(UInt4, RValue<UInt>) - t.x = *Pointer<Float>(buffer + index[0] * 4); - t.y = *Pointer<Float>(buffer + index[1] * 4); - t.z = *Pointer<Float>(buffer + index[2] * 4); - t.w = *Pointer<Float>(buffer + index[3] * 4); - t0 = As<UInt4>(t); - c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24)); - c.x = Float4((t0) & UInt4(0x1FF)) * c.w; - c.y = Float4((t0 >> 9) & UInt4(0x1FF)) * c.w; - c.z = Float4((t0 >> 18) & UInt4(0x1FF)) * c.w; - break; - } - case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - { - Float4 t; // TODO: add Insert(UInt4, RValue<UInt>) - t.x = *Pointer<Float>(buffer + index[0] * 4); - t.y = *Pointer<Float>(buffer + index[1] * 4); - t.z = *Pointer<Float>(buffer + index[2] * 4); - t.w = *Pointer<Float>(buffer + index[3] * 4); - t0 = As<UInt4>(t); - c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0))); - c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0))); - c.z = As<Float4>(halfToFloatBits((t0 >> 17) & UInt4(0x7FE0))); - break; - } - default: - UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat)); + c.x = As<Float4>(halfToFloatBits(t0)); + c.y = As<Float4>(halfToFloatBits(t1)); + c.z = As<Float4>(halfToFloatBits(t2)); + c.w = As<Float4>(halfToFloatBits(t3)); + transpose4x4(c.x, c.y, c.z, c.w); + break; + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_R32_SINT: + case VK_FORMAT_R32_UINT: + case VK_FORMAT_D32_SFLOAT: + // FIXME: Optimal shuffling? + c.x.x = *Pointer<Float>(buffer + index[0] * 4); + c.x.y = *Pointer<Float>(buffer + index[1] * 4); + c.x.z = *Pointer<Float>(buffer + index[2] * 4); + c.x.w = *Pointer<Float>(buffer + index[3] * 4); + break; + case VK_FORMAT_R32G32_SFLOAT: + case VK_FORMAT_R32G32_SINT: + case VK_FORMAT_R32G32_UINT: + // FIXME: Optimal shuffling? + c.x.xy = *Pointer<Float4>(buffer + index[0] * 8); + c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8); + c.z.xy = *Pointer<Float4>(buffer + index[2] * 8); + c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8); + c.y = c.x; + c.x = Float4(c.x.xz, c.z.xz); + c.y = Float4(c.y.yw, c.z.yw); + break; + case VK_FORMAT_R32G32B32_SFLOAT: + case VK_FORMAT_R32G32B32_SINT: + case VK_FORMAT_R32G32B32_UINT: + c.x = *Pointer<Float4>(buffer + index[0] * 16, 16); + c.y = *Pointer<Float4>(buffer + index[1] * 16, 16); + c.z = *Pointer<Float4>(buffer + index[2] * 16, 16); + c.w = *Pointer<Float4>(buffer + index[3] * 16, 16); + transpose4x3(c.x, c.y, c.z, c.w); + break; + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SINT: + case VK_FORMAT_R32G32B32A32_UINT: + c.x = *Pointer<Float4>(buffer + index[0] * 16, 16); + c.y = *Pointer<Float4>(buffer + index[1] * 16, 16); + c.z = *Pointer<Float4>(buffer + index[2] * 16, 16); + c.w = *Pointer<Float4>(buffer + index[3] * 16, 16); + transpose4x4(c.x, c.y, c.z, c.w); + break; + case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: + { + Float4 t; // TODO: add Insert(UInt4, RValue<UInt>) + t.x = *Pointer<Float>(buffer + index[0] * 4); + t.y = *Pointer<Float>(buffer + index[1] * 4); + t.z = *Pointer<Float>(buffer + index[2] * 4); + t.w = *Pointer<Float>(buffer + index[3] * 4); + t0 = As<UInt4>(t); + c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24)); + c.x = Float4((t0)&UInt4(0x1FF)) * c.w; + c.y = Float4((t0 >> 9) & UInt4(0x1FF)) * c.w; + c.z = Float4((t0 >> 18) & UInt4(0x1FF)) * c.w; + break; + } + case VK_FORMAT_B10G11R11_UFLOAT_PACK32: + { + Float4 t; // TODO: add Insert(UInt4, RValue<UInt>) + t.x = *Pointer<Float>(buffer + index[0] * 4); + t.y = *Pointer<Float>(buffer + index[1] * 4); + t.z = *Pointer<Float>(buffer + index[2] * 4); + t.w = *Pointer<Float>(buffer + index[3] * 4); + t0 = As<UInt4>(t); + c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0))); + c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0))); + c.z = As<Float4>(halfToFloatBits((t0 >> 17) & UInt4(0x7FE0))); + break; + } + default: + UNIMPLEMENTED("Format %d", VkFormat(state.textureFormat)); } } else @@ -1989,15 +2070,15 @@ switch(state.compareOp) { - case VK_COMPARE_OP_LESS_OR_EQUAL: boolean = CmpLE(ref, c.x); break; - case VK_COMPARE_OP_GREATER_OR_EQUAL: boolean = CmpNLT(ref, c.x); break; - case VK_COMPARE_OP_LESS: boolean = CmpLT(ref, c.x); break; - case VK_COMPARE_OP_GREATER: boolean = CmpNLE(ref, c.x); break; - case VK_COMPARE_OP_EQUAL: boolean = CmpEQ(ref, c.x); break; - case VK_COMPARE_OP_NOT_EQUAL: boolean = CmpNEQ(ref, c.x); break; - case VK_COMPARE_OP_ALWAYS: boolean = Int4(-1); break; - case VK_COMPARE_OP_NEVER: boolean = Int4(0); break; - default: ASSERT(false); + case VK_COMPARE_OP_LESS_OR_EQUAL: boolean = CmpLE(ref, c.x); break; + case VK_COMPARE_OP_GREATER_OR_EQUAL: boolean = CmpNLT(ref, c.x); break; + case VK_COMPARE_OP_LESS: boolean = CmpLT(ref, c.x); break; + case VK_COMPARE_OP_GREATER: boolean = CmpNLE(ref, c.x); break; + case VK_COMPARE_OP_EQUAL: boolean = CmpEQ(ref, c.x); break; + case VK_COMPARE_OP_NOT_EQUAL: boolean = CmpNEQ(ref, c.x); break; + case VK_COMPARE_OP_ALWAYS: boolean = Int4(-1); break; + case VK_COMPARE_OP_NEVER: boolean = Int4(0); break; + default: ASSERT(false); } c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f))); @@ -2025,29 +2106,29 @@ switch(state.border) { - case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: - case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: - borderRGB = Int4(0); - borderA = Int4(0); - break; - case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: - borderRGB = Int4(0); - borderA = float_one; - break; - case VK_BORDER_COLOR_INT_OPAQUE_BLACK: - borderRGB = Int4(0); - borderA = Int4(1); - break; - case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: - borderRGB = float_one; - borderA = float_one; - break; - case VK_BORDER_COLOR_INT_OPAQUE_WHITE: - borderRGB = Int4(1); - borderA = Int4(1); - break; - default: - UNIMPLEMENTED("sint/uint/sfloat border: %u", state.border); + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + borderRGB = Int4(0); + borderA = Int4(0); + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + borderRGB = Int4(0); + borderA = float_one; + break; + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + borderRGB = Int4(0); + borderA = Int4(1); + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + borderRGB = float_one; + borderA = float_one; + break; + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + borderRGB = Int4(1); + borderA = Int4(1); + break; + default: + UNIMPLEMENTED("sint/uint/sfloat border: %u", state.border); } Vector4f out; @@ -2076,7 +2157,7 @@ // TODO: Preferred formula is ceil(lod + 0.5) - 1 ilod = RoundInt(lod); } - else // MIPMAP_LINEAR + else // MIPMAP_LINEAR { ilod = Int(lod); } @@ -2148,7 +2229,7 @@ return As<Short4>(Int2(convert)) + Short4(0x8000u); } - else // Wrap + else // Wrap { return Short4(Int4(uw * Float4(1 << 16))); } @@ -2182,7 +2263,7 @@ { xyz0 = Min(Max(((function.offset != 0) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ); } - else if(addressingMode == ADDRESSING_LAYER) // Note: Offset does not apply to array layers + else if(addressingMode == ADDRESSING_LAYER) // Note: Offset does not apply to array layers { // For cube maps, the layer argument is per cube, each of which has 6 layers if(state.textureType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) @@ -2198,9 +2279,9 @@ } else { - const int halfBits = 0x3EFFFFFF; // Value just under 0.5f - const int oneBits = 0x3F7FFFFF; // Value just under 1.0f - const int twoBits = 0x3FFFFFFF; // Value just under 2.0f + const int halfBits = 0x3EFFFFFF; // Value just under 0.5f + const int oneBits = 0x3F7FFFFF; // Value just under 1.0f + const int twoBits = 0x3FFFFFFF; // Value just under 2.0f bool pointFilter = state.textureFilter == FILTER_POINT || state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR || @@ -2212,17 +2293,17 @@ { switch(addressingMode) { - case ADDRESSING_CLAMP: - coord = Min(Max(coord, Float4(0.0f)), Float4(dim) * As<Float4>(Int4(oneBits))); - break; - case ADDRESSING_BORDER: - // Don't map to a valid range here. - break; - default: - // If unnormalizedCoordinates is VK_TRUE, addressModeU and addressModeV must each be - // either VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE or VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER - UNREACHABLE("addressingMode %d", int(addressingMode)); - break; + case ADDRESSING_CLAMP: + coord = Min(Max(coord, Float4(0.0f)), Float4(dim) * As<Float4>(Int4(oneBits))); + break; + case ADDRESSING_BORDER: + // Don't map to a valid range here. + break; + default: + // If unnormalizedCoordinates is VK_TRUE, addressModeU and addressModeV must each be + // either VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE or VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER + UNREACHABLE("addressingMode %d", int(addressingMode)); + break; } } else if(state.textureFilter == FILTER_GATHER && addressingMode == ADDRESSING_MIRROR) @@ -2244,8 +2325,8 @@ xyz1 = xyz0 + Int4(1); - xyz0 = (maxXYZ) - mirror(mod(xyz0, Int4(2) * dim) - dim); - xyz1 = (maxXYZ) - mirror(mod(xyz1, Int4(2) * dim) - dim); + xyz0 = (maxXYZ)-mirror(mod(xyz0, Int4(2) * dim) - dim); + xyz1 = (maxXYZ)-mirror(mod(xyz1, Int4(2) * dim) - dim); return; } @@ -2255,17 +2336,17 @@ { switch(addressingMode) { - case ADDRESSING_CLAMP: - case ADDRESSING_SEAMLESS: - // Linear filtering of cube doesn't require clamping because the coordinates - // are already in [0, 1] range and numerical imprecision is tolerated. - if(addressingMode != ADDRESSING_SEAMLESS || pointFilter) - { - Float4 one = As<Float4>(Int4(oneBits)); - coord = Min(Max(coord, Float4(0.0f)), one); - } - break; - case ADDRESSING_MIRROR: + case ADDRESSING_CLAMP: + case ADDRESSING_SEAMLESS: + // Linear filtering of cube doesn't require clamping because the coordinates + // are already in [0, 1] range and numerical imprecision is tolerated. + if(addressingMode != ADDRESSING_SEAMLESS || pointFilter) + { + Float4 one = As<Float4>(Int4(oneBits)); + coord = Min(Max(coord, Float4(0.0f)), one); + } + break; + case ADDRESSING_MIRROR: { Float4 half = As<Float4>(Int4(halfBits)); Float4 one = As<Float4>(Int4(oneBits)); @@ -2273,7 +2354,7 @@ coord = one - Abs(two * Frac(coord * half) - one); } break; - case ADDRESSING_MIRRORONCE: + case ADDRESSING_MIRRORONCE: { Float4 half = As<Float4>(Int4(halfBits)); Float4 one = As<Float4>(Int4(oneBits)); @@ -2281,12 +2362,12 @@ coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one); } break; - case ADDRESSING_BORDER: - // Don't map to a valid range here. - break; - default: // Wrap - coord = Frac(coord); - break; + case ADDRESSING_BORDER: + // Don't map to a valid range here. + break; + default: // Wrap + coord = Frac(coord); + break; } } @@ -2331,7 +2412,7 @@ xyz0 += Int4(1); } - xyz1 = xyz0 - filter; // Increment + xyz1 = xyz0 - filter; // Increment if(addressingMode == ADDRESSING_BORDER) { @@ -2345,41 +2426,41 @@ { switch(addressingMode) { - case ADDRESSING_SEAMLESS: - UNREACHABLE("addressingMode %d", int(addressingMode)); // Cube sampling doesn't support offset. - case ADDRESSING_MIRROR: - case ADDRESSING_MIRRORONCE: - // TODO: Implement ADDRESSING_MIRROR and ADDRESSING_MIRRORONCE. - // Fall through to Clamp. - case ADDRESSING_CLAMP: - xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ); - xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ); - break; - default: // Wrap - xyz0 = mod(xyz0, dim); - xyz1 = mod(xyz1, dim); - break; + case ADDRESSING_SEAMLESS: + UNREACHABLE("addressingMode %d", int(addressingMode)); // Cube sampling doesn't support offset. + case ADDRESSING_MIRROR: + case ADDRESSING_MIRRORONCE: + // TODO: Implement ADDRESSING_MIRROR and ADDRESSING_MIRRORONCE. + // Fall through to Clamp. + case ADDRESSING_CLAMP: + xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ); + xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ); + break; + default: // Wrap + xyz0 = mod(xyz0, dim); + xyz1 = mod(xyz1, dim); + break; } } else if(state.textureFilter != FILTER_POINT) { switch(addressingMode) { - case ADDRESSING_SEAMLESS: - break; - case ADDRESSING_MIRROR: - case ADDRESSING_MIRRORONCE: - case ADDRESSING_CLAMP: - xyz0 = Max(xyz0, Int4(0)); - xyz1 = Min(xyz1, maxXYZ); - break; - default: // Wrap + case ADDRESSING_SEAMLESS: + break; + case ADDRESSING_MIRROR: + case ADDRESSING_MIRRORONCE: + case ADDRESSING_CLAMP: + xyz0 = Max(xyz0, Int4(0)); + xyz1 = Min(xyz1, maxXYZ); + break; + default: // Wrap { Int4 under = CmpLT(xyz0, Int4(0)); - xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // TODO: IfThenElse() + xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // TODO: IfThenElse() Int4 nover = CmpLT(xyz1, dim); - xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz + xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz } break; } @@ -2401,7 +2482,7 @@ { c = As<UShort4>(c) >> 8; - Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16)); + Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants, sRGBtoLinear8_16)); c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); @@ -2483,13 +2564,13 @@ { switch(state.gatherComponent) { - case 0: return state.swizzle.r; - case 1: return state.swizzle.g; - case 2: return state.swizzle.b; - case 3: return state.swizzle.a; - default: - UNREACHABLE("Invalid component"); - return VK_COMPONENT_SWIZZLE_R; + case 0: return state.swizzle.r; + case 1: return state.swizzle.g; + case 2: return state.swizzle.b; + case 3: return state.swizzle.a; + default: + UNREACHABLE("Invalid component"); + return VK_COMPONENT_SWIZZLE_R; } }
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp index 396a9f6..b1e925f 100644 --- a/src/Pipeline/SamplerCore.hpp +++ b/src/Pipeline/SamplerCore.hpp
@@ -21,7 +21,7 @@ #include "Reactor/Reactor.hpp" #ifdef None -#undef None // b/127920555 +# undef None // b/127920555 #endif namespace sw { @@ -45,7 +45,9 @@ struct SamplerFunction { SamplerFunction(SamplerMethod method, bool offset = false, bool sample = false) - : method(method), offset(offset), sample(sample) + : method(method) + , offset(offset) + , sample(sample) {} operator SamplerMethod() { return method; } @@ -60,20 +62,20 @@ public: SamplerCore(Pointer<Byte> &constants, const Sampler &state); - Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4& sampleId, SamplerFunction function); + Vector4f sampleTexture(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float4 uvw[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4f &offset, Int4 &sampleId, SamplerFunction function); private: Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod); - Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function); - Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function); - Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function); - Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function); - Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function); - Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function); - Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function); - Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function); - Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function); - Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4& cubeArrayCoord, const Int4& sampleId, Float &lod, bool secondLOD, SamplerFunction function); + Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function); + Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function); + Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function); + Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function); + Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function); + Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function); + Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function); + Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function); + Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function); + Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, const Float4 &cubeArrayCoord, const Int4 &sampleId, Float &lod, bool secondLOD, SamplerFunction function); Float log2sqrt(Float lod); Float log2(Float lod); void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Float4 &dsx, Float4 &dsy, SamplerFunction function); @@ -81,15 +83,15 @@ void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, SamplerFunction function); Int4 cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M); Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode); - void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, SamplerFunction function); - void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, SamplerFunction function); - Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, const Short4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function); + void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function); + void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, SamplerFunction function); + Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, const Short4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function); Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer); - Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, const Int4& cubeArrayId, const Int4& sampleId, Pointer<Byte> buffer, SamplerFunction function); + Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, const Int4 &cubeArrayId, const Int4 &sampleId, Pointer<Byte> buffer, SamplerFunction function); Vector4f replaceBorderTexel(const Vector4f &c, Int4 valid); void selectMipmap(const Pointer<Byte> &texture, Pointer<Byte> &mipmap, Pointer<Byte> &buffer, const Float &lod, bool secondLOD); - Short4 address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap); - void address(const Float4 &uw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function); + Short4 address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap); + void address(const Float4 &uw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function); Int4 computeFilterOffset(Float &lod); void convertSigned15(Float4 &cf, Short4 &ci); @@ -120,21 +122,22 @@ #ifdef ENABLE_RR_PRINT namespace rr { -template <> struct PrintValue::Ty<sw::SamplerFunction> +template<> +struct PrintValue::Ty<sw::SamplerFunction> { - static std::string fmt(const sw::SamplerFunction& v) + static std::string fmt(const sw::SamplerFunction &v) { return std::string("SamplerFunction[") + - "method: " + std::to_string(v.method) + - ", offset: " + std::to_string(v.offset) + - ", sample: " + std::to_string(v.sample) + - "]"; + "method: " + std::to_string(v.method) + + ", offset: " + std::to_string(v.offset) + + ", sample: " + std::to_string(v.sample) + + "]"; } - static std::vector<rr::Value*> val(const sw::SamplerFunction& v) { return {}; } + static std::vector<rr::Value *> val(const sw::SamplerFunction &v) { return {}; } }; } // namespace rr -#endif // ENABLE_RR_PRINT +#endif // ENABLE_RR_PRINT -#endif // sw_SamplerCore_hpp +#endif // sw_SamplerCore_hpp
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp index d3307df..06dea29 100644 --- a/src/Pipeline/SetupRoutine.cpp +++ b/src/Pipeline/SetupRoutine.cpp
@@ -12,18 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include <Device/Vertex.hpp> #include "SetupRoutine.hpp" +#include <Device/Vertex.hpp> #include "Constants.hpp" -#include "Device/Primitive.hpp" #include "Device/Polygon.hpp" +#include "Device/Primitive.hpp" #include "Device/Renderer.hpp" #include "Reactor/Reactor.hpp" namespace sw { -SetupRoutine::SetupRoutine(const SetupProcessor::State &state) : state(state) +SetupRoutine::SetupRoutine(const SetupProcessor::State &state) + : state(state) { } @@ -40,15 +41,15 @@ Pointer<Byte> polygon(function.Arg<2>()); Pointer<Byte> data(function.Arg<3>()); - Pointer<Byte> constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants)); + Pointer<Byte> constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData, constants)); const bool point = state.isDrawPoint; const bool line = state.isDrawLine; const bool triangle = state.isDrawTriangle; - const int V0 = OFFSET(Triangle,v0); - const int V1 = (triangle || line) ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0); - const int V2 = triangle ? OFFSET(Triangle,v2) : (line ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0)); + const int V0 = OFFSET(Triangle, v0); + const int V1 = (triangle || line) ? OFFSET(Triangle, v1) : OFFSET(Triangle, v0); + const int V2 = triangle ? OFFSET(Triangle, v2) : (line ? OFFSET(Triangle, v1) : OFFSET(Triangle, v0)); Pointer<Byte> v0 = tri + V0; Pointer<Byte> v1 = tri + V1; @@ -57,15 +58,15 @@ Array<Int> X(16); Array<Int> Y(16); - X[0] = *Pointer<Int>(v0 + OFFSET(Vertex,projected.x)); - X[1] = *Pointer<Int>(v1 + OFFSET(Vertex,projected.x)); - X[2] = *Pointer<Int>(v2 + OFFSET(Vertex,projected.x)); + X[0] = *Pointer<Int>(v0 + OFFSET(Vertex, projected.x)); + X[1] = *Pointer<Int>(v1 + OFFSET(Vertex, projected.x)); + X[2] = *Pointer<Int>(v2 + OFFSET(Vertex, projected.x)); - Y[0] = *Pointer<Int>(v0 + OFFSET(Vertex,projected.y)); - Y[1] = *Pointer<Int>(v1 + OFFSET(Vertex,projected.y)); - Y[2] = *Pointer<Int>(v2 + OFFSET(Vertex,projected.y)); + Y[0] = *Pointer<Int>(v0 + OFFSET(Vertex, projected.y)); + Y[1] = *Pointer<Int>(v1 + OFFSET(Vertex, projected.y)); + Y[2] = *Pointer<Int>(v2 + OFFSET(Vertex, projected.y)); - Int d = 1; // Winding direction + Int d = 1; // Winding direction // Culling if(triangle) @@ -78,7 +79,7 @@ Float y1 = Float(Y[1]); Float y2 = Float(Y[2]); - Float A = (y0 - y2) * x1 + (y2 - y1) * x0 + (y1 - y0) * x2; // Area + Float A = (y0 - y2) * x1 + (y2 - y1) * x0 + (y1 - y0) * x2; // Area If(A == 0.0f) { @@ -106,40 +107,40 @@ If(frontFacing) { - *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); } Else { - *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); } } else { - *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); } - Int n = *Pointer<Int>(polygon + OFFSET(Polygon,n)); - Int m = *Pointer<Int>(polygon + OFFSET(Polygon,i)); + Int n = *Pointer<Int>(polygon + OFFSET(Polygon, n)); + Int m = *Pointer<Int>(polygon + OFFSET(Polygon, i)); - If(m != 0 || Bool(!triangle)) // Clipped triangle; reproject + If(m != 0 || Bool(!triangle)) // Clipped triangle; reproject { - Pointer<Byte> V = polygon + OFFSET(Polygon,P) + m * sizeof(void*) * 16; + Pointer<Byte> V = polygon + OFFSET(Polygon, P) + m * sizeof(void *) * 16; Int i = 0; Do { - Pointer<Float4> p = *Pointer<Pointer<Float4> >(V + i * sizeof(void*)); + Pointer<Float4> p = *Pointer<Pointer<Float4> >(V + i * sizeof(void *)); Float4 v = *Pointer<Float4>(p, 16); Float w = v.w; Float rhw = IfThenElse(w != 0.0f, 1.0f / w, Float(1.0f)); - X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,X0xF)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,WxF))); - Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,Y0xF)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,HxF))); + X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData, X0xF)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData, WxF))); + Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData, Y0xF)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData, HxF))); i++; } @@ -176,8 +177,8 @@ yMax = (yMax + subPixM) >> subPixB; } - yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0))); - yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData,scissorY1))); + yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData, scissorY0))); + yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData, scissorY1))); // If yMin and yMax are initially negative, the scissor clamping above will typically result // in yMin == 0 and yMax unchanged. We bail as we don't need to rasterize this primitive, and @@ -201,16 +202,16 @@ if(state.multiSample > 1) { - Xq[i] = Xq[i] + *Pointer<Int>(constants + OFFSET(Constants,Xf) + q * sizeof(int)); - Yq[i] = Yq[i] + *Pointer<Int>(constants + OFFSET(Constants,Yf) + q * sizeof(int)); + Xq[i] = Xq[i] + *Pointer<Int>(constants + OFFSET(Constants, Xf) + q * sizeof(int)); + Yq[i] = Yq[i] + *Pointer<Int>(constants + OFFSET(Constants, Yf) + q * sizeof(int)); } i++; } Until(i >= n); - Pointer<Byte> leftEdge = Pointer<Byte>(primitive + OFFSET(Primitive,outline->left)) + q * sizeof(Primitive); - Pointer<Byte> rightEdge = Pointer<Byte>(primitive + OFFSET(Primitive,outline->right)) + q * sizeof(Primitive); + Pointer<Byte> leftEdge = Pointer<Byte>(primitive + OFFSET(Primitive, outline->left)) + q * sizeof(Primitive); + Pointer<Byte> rightEdge = Pointer<Byte>(primitive + OFFSET(Primitive, outline->right)) + q * sizeof(Primitive); if(state.multiSample > 1) { @@ -265,8 +266,8 @@ } } - *Pointer<Int>(primitive + OFFSET(Primitive,yMin)) = yMin; - *Pointer<Int>(primitive + OFFSET(Primitive,yMax)) = yMax; + *Pointer<Int>(primitive + OFFSET(Primitive, yMin)) = yMin; + *Pointer<Int>(primitive + OFFSET(Primitive, yMax)) = yMax; // Sort by minimum y if(triangle) @@ -305,15 +306,15 @@ w012.z = w2; w012.w = 1; - Float rhw0 = *Pointer<Float>(v0 + OFFSET(Vertex,projected.w)); + Float rhw0 = *Pointer<Float>(v0 + OFFSET(Vertex, projected.w)); - Int X0 = *Pointer<Int>(v0 + OFFSET(Vertex,projected.x)); - Int X1 = *Pointer<Int>(v1 + OFFSET(Vertex,projected.x)); - Int X2 = *Pointer<Int>(v2 + OFFSET(Vertex,projected.x)); + Int X0 = *Pointer<Int>(v0 + OFFSET(Vertex, projected.x)); + Int X1 = *Pointer<Int>(v1 + OFFSET(Vertex, projected.x)); + Int X2 = *Pointer<Int>(v2 + OFFSET(Vertex, projected.x)); - Int Y0 = *Pointer<Int>(v0 + OFFSET(Vertex,projected.y)); - Int Y1 = *Pointer<Int>(v1 + OFFSET(Vertex,projected.y)); - Int Y2 = *Pointer<Int>(v2 + OFFSET(Vertex,projected.y)); + Int Y0 = *Pointer<Int>(v0 + OFFSET(Vertex, projected.y)); + Int Y1 = *Pointer<Int>(v1 + OFFSET(Vertex, projected.y)); + Int Y2 = *Pointer<Int>(v2 + OFFSET(Vertex, projected.y)); if(point) { @@ -347,8 +348,8 @@ Float4 xQuad = Float4(0, 1, 0, 1) - Float4(dx); Float4 yQuad = Float4(0, 0, 1, 1) - Float4(dy); - *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16) = xQuad; - *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16) = yQuad; + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16) = xQuad; + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16) = yQuad; Float4 M[3]; @@ -365,18 +366,18 @@ M[0].x = (y1 * w2 - y2 * w1) * D; M[0].y = (x2 * w1 - x1 * w2) * D; - // M[0].z = rhw0; - // M[0].w = 0; + // M[0].z = rhw0; + // M[0].w = 0; M[1].x = y2 * A; M[1].y = -x2 * A; - // M[1].z = 0; - // M[1].w = 0; + // M[1].z = 0; + // M[1].w = 0; M[2].x = -y1 * A; M[2].y = x1 * A; - // M[2].z = 0; - // M[2].w = 0; + // M[2].z = 0; + // M[2].w = 0; } if(state.interpolateW) @@ -387,16 +388,16 @@ Float4 B = ABC.y; Float4 C = ABC.z; - *Pointer<Float4>(primitive + OFFSET(Primitive,w.A), 16) = A; - *Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16) = B; - *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) = C; + *Pointer<Float4>(primitive + OFFSET(Primitive, w.A), 16) = A; + *Pointer<Float4>(primitive + OFFSET(Primitive, w.B), 16) = B; + *Pointer<Float4>(primitive + OFFSET(Primitive, w.C), 16) = C; } if(state.interpolateZ) { - Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex,projected.z)); - Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex,projected.z)); - Float z2 = *Pointer<Float>(v2 + OFFSET(Vertex,projected.z)); + Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex, projected.z)); + Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex, projected.z)); + Float z2 = *Pointer<Float>(v2 + OFFSET(Vertex, projected.z)); z1 -= z0; z2 -= z0; @@ -412,7 +413,7 @@ Float x2 = Float(X2) * (1.0f / subPixF); Float y2 = Float(Y2) * (1.0f / subPixF); - Float D = *Pointer<Float>(data + OFFSET(DrawData,depthRange)) / (x1 * y2 - x2 * y1); + Float D = *Pointer<Float>(data + OFFSET(DrawData, depthRange)) / (x1 * y2 - x2 * y1); Float a = (y2 * z1 - y1 * z2) * D; Float b = (x1 * z2 - x2 * z1) * D; @@ -426,22 +427,22 @@ B = Float4(0, 0, 0, 0); } - *Pointer<Float4>(primitive + OFFSET(Primitive,z.A), 16) = A; - *Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16) = B; + *Pointer<Float4>(primitive + OFFSET(Primitive, z.A), 16) = A; + *Pointer<Float4>(primitive + OFFSET(Primitive, z.B), 16) = B; Float c = z0; if(state.applySlopeDepthBias) { Float bias = Max(Abs(Float(A.x)), Abs(Float(B.x))); - bias *= *Pointer<Float>(data + OFFSET(DrawData,slopeDepthBias)); + bias *= *Pointer<Float>(data + OFFSET(DrawData, slopeDepthBias)); c += bias; } - C = Float4(c * *Pointer<Float>(data + OFFSET(DrawData,depthRange)) + *Pointer<Float>(data + OFFSET(DrawData,depthNear))); + C = Float4(c * *Pointer<Float>(data + OFFSET(DrawData, depthRange)) + *Pointer<Float>(data + OFFSET(DrawData, depthNear))); - *Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) = C; + *Pointer<Float4>(primitive + OFFSET(Primitive, z.C), 16) = C; } for(int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++) @@ -449,27 +450,27 @@ if(state.gradient[interpolant].Type != SpirvShader::ATTRIBTYPE_UNUSED) { setupGradient(primitive, tri, w012, M, v0, v1, v2, - OFFSET(Vertex, v[interpolant]), - OFFSET(Primitive, V[interpolant]), - state.gradient[interpolant].Flat, - !state.gradient[interpolant].NoPerspective); + OFFSET(Vertex, v[interpolant]), + OFFSET(Primitive, V[interpolant]), + state.gradient[interpolant].Flat, + !state.gradient[interpolant].NoPerspective); } } for(unsigned int i = 0; i < state.numClipDistances; i++) { setupGradient(primitive, tri, w012, M, v0, v1, v2, - OFFSET(Vertex, clipDistance[i]), - OFFSET(Primitive, clipDistance[i]), - false, true); + OFFSET(Vertex, clipDistance[i]), + OFFSET(Primitive, clipDistance[i]), + false, true); } for(unsigned int i = 0; i < state.numCullDistances; i++) { setupGradient(primitive, tri, w012, M, v0, v1, v2, - OFFSET(Vertex, cullDistance[i]), - OFFSET(Primitive, cullDistance[i]), - false, true); + OFFSET(Vertex, cullDistance[i]), + OFFSET(Primitive, cullDistance[i]), + false, true); } Return(1); @@ -509,7 +510,7 @@ } else { - int leadingVertex = OFFSET(Triangle,v0); + int leadingVertex = OFFSET(Triangle, v0); Float C = *Pointer<Float>(triangle + leadingVertex + attribute); *Pointer<Float4>(primitive + planeEquation + 0, 16) = Float4(0, 0, 0, 0); @@ -532,16 +533,16 @@ constexpr int subPixB = vk::SUBPIXEL_PRECISION_BITS; constexpr int subPixM = vk::SUBPIXEL_PRECISION_MASK; - Int y1 = Max((Y1 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY0))); - Int y2 = Min((Y2 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY1))); + Int y1 = Max((Y1 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData, scissorY0))); + Int y2 = Min((Y2 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData, scissorY1))); If(y1 < y2) { - Int xMin = *Pointer<Int>(data + OFFSET(DrawData,scissorX0)); - Int xMax = *Pointer<Int>(data + OFFSET(DrawData,scissorX1)); + Int xMin = *Pointer<Int>(data + OFFSET(DrawData, scissorX0)); + Int xMax = *Pointer<Int>(data + OFFSET(DrawData, scissorX1)); - Pointer<Byte> leftEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left); - Pointer<Byte> rightEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right); + Pointer<Byte> leftEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->left); + Pointer<Byte> rightEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->right); Pointer<Byte> edge = IfThenElse(swap, rightEdge, leftEdge); // Deltas @@ -552,19 +553,19 @@ Int FDY12 = DY12 << subPixB; Int X = DX12 * ((y1 << subPixB) - Y1) + (X1 & subPixM) * DY12; - Int x = (X1 >> subPixB) + X / FDY12; // Edge - Int d = X % FDY12; // Error-term - Int ceil = -d >> 31; // Ceiling division: remainder <= 0 + Int x = (X1 >> subPixB) + X / FDY12; // Edge + Int d = X % FDY12; // Error-term + Int ceil = -d >> 31; // Ceiling division: remainder <= 0 x -= ceil; d -= ceil & FDY12; - Int Q = FDX12 / FDY12; // Edge-step - Int R = FDX12 % FDY12; // Error-step - Int floor = R >> 31; // Flooring division: remainder >= 0 + Int Q = FDX12 / FDY12; // Edge-step + Int R = FDX12 % FDY12; // Error-step + Int floor = R >> 31; // Flooring division: remainder >= 0 Q += floor; R += floor & FDY12; - Int D = FDY12; // Error-overflow + Int D = FDY12; // Error-overflow Int y = y1; Do @@ -588,7 +589,7 @@ void SetupRoutine::conditionalRotate1(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2) { - #if 0 // Rely on LLVM optimization +#if 0 // Rely on LLVM optimization If(condition) { Pointer<Byte> vX; @@ -598,17 +599,17 @@ v1 = v2; v2 = vX; } - #else - Pointer<Byte> vX = v0; - v0 = IfThenElse(condition, v1, v0); - v1 = IfThenElse(condition, v2, v1); - v2 = IfThenElse(condition, vX, v2); - #endif +#else + Pointer<Byte> vX = v0; + v0 = IfThenElse(condition, v1, v0); + v1 = IfThenElse(condition, v2, v1); + v2 = IfThenElse(condition, vX, v2); +#endif } void SetupRoutine::conditionalRotate2(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2) { - #if 0 // Rely on LLVM optimization +#if 0 // Rely on LLVM optimization If(condition) { Pointer<Byte> vX; @@ -618,12 +619,12 @@ v1 = v0; v0 = vX; } - #else - Pointer<Byte> vX = v2; - v2 = IfThenElse(condition, v1, v2); - v1 = IfThenElse(condition, v0, v1); - v0 = IfThenElse(condition, vX, v0); - #endif +#else + Pointer<Byte> vX = v2; + v2 = IfThenElse(condition, v1, v2); + v1 = IfThenElse(condition, v0, v1); + v0 = IfThenElse(condition, vX, v0); +#endif } SetupFunction::RoutineType SetupRoutine::getRoutine()
diff --git a/src/Pipeline/SetupRoutine.hpp b/src/Pipeline/SetupRoutine.hpp index 59fe55a..b8b399b 100644 --- a/src/Pipeline/SetupRoutine.hpp +++ b/src/Pipeline/SetupRoutine.hpp
@@ -45,4 +45,4 @@ } // namespace sw -#endif // sw_SetupRoutine_hpp +#endif // sw_SetupRoutine_hpp
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp index 65b2084..534ed8d 100644 --- a/src/Pipeline/ShaderCore.cpp +++ b/src/Pipeline/ShaderCore.cpp
@@ -55,10 +55,10 @@ { switch(i) { - case 0: return x; - case 1: return y; - case 2: return z; - case 3: return w; + case 0: return x; + case 1: return y; + case 2: return z; + case 3: return w; } return x; @@ -98,10 +98,10 @@ { switch(i) { - case 0: return x; - case 1: return y; - case 2: return z; - case 3: return w; + case 0: return x; + case 1: return y; + case 2: return z; + case 3: return w; } return x; @@ -116,20 +116,20 @@ // the IEEE-754 floating-point number. Clamp to prevent overflow // past the representation of infinity. Float4 x0 = x; - x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f - x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f + x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f + x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f Int4 i = RoundInt(x0 - Float4(0.5f)); - Float4 ii = As<Float4>((i + Int4(127)) << 23); // Add single-precision bias, and shift into exponent. + Float4 ii = As<Float4>((i + Int4(127)) << 23); // Add single-precision bias, and shift into exponent. // For the fractional part use a polynomial // which approximates 2^f in the 0 to 1 range. Float4 f = x0 - Float4(i); - Float4 ff = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f - ff = ff * f + As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f - ff = ff * f + As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f - ff = ff * f + As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f - ff = ff * f + As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f + Float4 ff = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f + ff = ff * f + As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f + ff = ff * f + As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f + ff = ff * f + As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f + ff = ff * f + As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f ff = ff * f + Float4(1.0f); return ii * ff; @@ -147,7 +147,7 @@ x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000)); x1 = As<Float4>(As<UInt4>(x1) >> 8); x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f))); - x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f; + x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f; x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f))); x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f); @@ -163,13 +163,13 @@ Float4 exponential(RValue<Float4> x, bool pp) { // FIXME: Propagate the constant - return exponential2(Float4(1.44269504f) * x, pp); // 1/ln(2) + return exponential2(Float4(1.44269504f) * x, pp); // 1/ln(2) } Float4 logarithm(RValue<Float4> x, bool pp) { // FIXME: Propagate the constant - return Float4(6.93147181e-1f) * logarithm2(x, pp); // ln(2) + return Float4(6.93147181e-1f) * logarithm2(x, pp); // ln(2) } Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp) @@ -191,7 +191,7 @@ if(finite) { int big = 0x7F7FFFFF; - rcp = Min(rcp, Float4((float&)big)); + rcp = Min(rcp, Float4((float &)big)); } return rcp; @@ -234,8 +234,8 @@ Float4 sine_pi(RValue<Float4> x, bool pp) { - const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2 - const Float4 B = Float4(1.27323954e+0f); // 4/pi + const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2 + const Float4 B = Float4(1.27323954e+0f); // 4/pi const Float4 C = Float4(7.75160950e-1f); const Float4 D = Float4(2.24839049e-1f); @@ -265,7 +265,7 @@ Float4 sine(RValue<Float4> x, bool pp) { // Reduce to [-0.5, 0.5] range - Float4 y = x * Float4(1.59154943e-1f); // 1/2pi + Float4 y = x * Float4(1.59154943e-1f); // 1/2pi y = y - Round(y); if(!pp) @@ -320,10 +320,10 @@ Float4 arcsin(RValue<Float4> x, bool pp) { - if(false) // Simpler implementation fails even lowp precision tests + if(false) // Simpler implementation fails even lowp precision tests { // x*(pi/2-sqrt(1-x*x)*pi/5) - return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x*x) * Float4(6.28318531e-1f)); + return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x * x) * Float4(6.28318531e-1f)); } else { @@ -335,7 +335,7 @@ const Float4 a3(-0.0187293f); Float4 absx = Abs(x); return As<Float4>(As<Int4>(half_pi - Sqrt(Float4(1.0f) - absx) * (a0 + absx * (a1 + absx * (a2 + absx * a3)))) ^ - (As<Int4>(x) & Int4(0x80000000))); + (As<Int4>(x) & Int4(0x80000000))); } } @@ -366,20 +366,20 @@ { Float4 absx = Abs(x); Int4 O = CmpNLT(absx, Float4(1.0f)); - Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) | (~O & As<Int4>(absx))); // FIXME: Vector select + Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) | (~O & As<Int4>(absx))); // FIXME: Vector select const Float4 half_pi(1.57079632f); Float4 theta = arctan_01(y, pp); - return As<Float4>(((O & As<Int4>(half_pi - theta)) | (~O & As<Int4>(theta))) ^ // FIXME: Vector select - (As<Int4>(x) & Int4(0x80000000))); + return As<Float4>(((O & As<Int4>(half_pi - theta)) | (~O & As<Int4>(theta))) ^ // FIXME: Vector select + (As<Int4>(x) & Int4(0x80000000))); } Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp) { - const Float4 pi(3.14159265f); // pi - const Float4 minus_pi(-3.14159265f); // -pi - const Float4 half_pi(1.57079632f); // pi/2 - const Float4 quarter_pi(7.85398163e-1f); // pi/4 + const Float4 pi(3.14159265f); // pi + const Float4 minus_pi(-3.14159265f); // -pi + const Float4 half_pi(1.57079632f); // pi/2 + const Float4 quarter_pi(7.85398163e-1f); // pi/4 // Rotate to upper semicircle when in lower semicircle Int4 S = CmpLT(y, Float4(0.0f)); @@ -390,24 +390,24 @@ // Rotate to right quadrant when in left quadrant Int4 Q = CmpLT(x0, Float4(0.0f)); theta += As<Float4>(Q & As<Int4>(half_pi)); - Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0))); // FIXME: Vector select - Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0))); // FIXME: Vector select + Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0))); // FIXME: Vector select + Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0))); // FIXME: Vector select // Mirror to first octant when in second octant Int4 O = CmpNLT(y1, x1); - Float4 x2 = As<Float4>((O & As<Int4>(y1)) | (~O & As<Int4>(x1))); // FIXME: Vector select - Float4 y2 = As<Float4>((O & As<Int4>(x1)) | (~O & As<Int4>(y1))); // FIXME: Vector select + Float4 x2 = As<Float4>((O & As<Int4>(y1)) | (~O & As<Int4>(x1))); // FIXME: Vector select + Float4 y2 = As<Float4>((O & As<Int4>(x1)) | (~O & As<Int4>(y1))); // FIXME: Vector select // Approximation of atan in [0..1] Int4 zero_x = CmpEQ(x2, Float4(0.0f)); - Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4 + Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4 Float4 atan2_theta = arctan_01(y2 / x2, pp); - theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) | (~O & (As<Int4>(atan2_theta))))) | // FIXME: Vector select + theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) | (~O & (As<Int4>(atan2_theta))))) | // FIXME: Vector select (inf_y & As<Int4>(quarter_pi))); // Recover loss of precision for tiny theta angles - Int4 precision_loss = S & Q & O & ~inf_y; // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta - return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) | (~precision_loss & As<Int4>(theta))); // FIXME: Vector select + Int4 precision_loss = S & Q & O & ~inf_y; // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta + return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) | (~precision_loss & As<Int4>(theta))); // FIXME: Vector select } Float4 sineh(RValue<Float4> x, bool pp) @@ -539,10 +539,10 @@ { switch(N) { - case 1: transpose4x1(row0, row1, row2, row3); break; - case 2: transpose4x2(row0, row1, row2, row3); break; - case 3: transpose4x3(row0, row1, row2, row3); break; - case 4: transpose4x4(row0, row1, row2, row3); break; + case 1: transpose4x1(row0, row1, row2, row3); break; + case 2: transpose4x2(row0, row1, row2, row3); break; + case 3: transpose4x3(row0, row1, row2, row3); break; + case 4: transpose4x4(row0, row1, row2, row3); break; } } @@ -551,15 +551,15 @@ auto magic = SIMD::UInt(126 << 23); auto sign16 = halfBits & SIMD::UInt(0x8000); - auto man16 = halfBits & SIMD::UInt(0x03FF); - auto exp16 = halfBits & SIMD::UInt(0x7C00); + auto man16 = halfBits & SIMD::UInt(0x03FF); + auto exp16 = halfBits & SIMD::UInt(0x7C00); auto isDnormOrZero = CmpEQ(exp16, SIMD::UInt(0)); auto isInfOrNaN = CmpEQ(exp16, SIMD::UInt(0x7C00)); auto sign32 = sign16 << 16; - auto man32 = man16 << 13; - auto exp32 = (exp16 + SIMD::UInt(0x1C000)) << 13; + auto man32 = man16 << 13; + auto exp32 = (exp16 + SIMD::UInt(0x1C000)) << 13; auto norm32 = (man32 | exp32) | (isInfOrNaN & SIMD::UInt(0x7F800000)); auto denorm32 = As<SIMD::UInt>(As<SIMD::Float>(magic + man16) - As<SIMD::Float>(magic)); @@ -584,7 +584,10 @@ // Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf // instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation) SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)), - As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) | + As<SIMD::Float>(SIMD::UInt(c_clamp))))) - + SIMD::UInt(mask_round)) >> + 13) & + b_isnormal) | ((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) & ((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) | SIMD::UInt(c_infty_as_fp16))); @@ -610,7 +613,7 @@ { SIMD::UInt halfBits = floatToHalfBits(As<SIMD::UInt>(value), true) & SIMD::UInt(0x7FF00000, 0x7FF00000, 0x7FE00000, 0); - return (UInt(halfBits.x) >> 20) | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1); + return (UInt(halfBits.x) >> 20) | (UInt(halfBits.y) >> 9) | (UInt(halfBits.z) << 1); } rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints) @@ -670,9 +673,9 @@ // Performs a fused-multiply add, returning a * b + c. rr::RValue<sw::SIMD::Float> FMA( - rr::RValue<sw::SIMD::Float> const &a, - rr::RValue<sw::SIMD::Float> const &b, - rr::RValue<sw::SIMD::Float> const &c) + rr::RValue<sw::SIMD::Float> const &a, + rr::RValue<sw::SIMD::Float> const &b, + rr::RValue<sw::SIMD::Float> const &c) { return a * b + c; } @@ -694,12 +697,12 @@ auto xIsNan = IsNan(x); auto yIsNan = IsNan(y); return As<sw::SIMD::Float>( - // If neither are NaN, return min - ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) | - // If one operand is a NaN, the other operand is the result - // If both operands are NaN, the result is a NaN. - ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) | - (( xIsNan ) & As<sw::SIMD::Int>(y))); + // If neither are NaN, return min + ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) | + // If one operand is a NaN, the other operand is the result + // If both operands are NaN, the result is a NaN. + ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) | + ((xIsNan)&As<sw::SIMD::Int>(y))); } // Returns y if y > x; otherwise result is x. @@ -711,37 +714,37 @@ auto xIsNan = IsNan(x); auto yIsNan = IsNan(y); return As<sw::SIMD::Float>( - // If neither are NaN, return max - ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) | - // If one operand is a NaN, the other operand is the result - // If both operands are NaN, the result is a NaN. - ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) | - (( xIsNan ) & As<sw::SIMD::Int>(y))); + // If neither are NaN, return max + ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) | + // If one operand is a NaN, the other operand is the result + // If both operands are NaN, the result is a NaN. + ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) | + ((xIsNan)&As<sw::SIMD::Int>(y))); } // Returns the determinant of a 2x2 matrix. rr::RValue<sw::SIMD::Float> Determinant( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, - rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d) + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, + rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d) { - return a*d - b*c; + return a * d - b * c; } // Returns the determinant of a 3x3 matrix. rr::RValue<sw::SIMD::Float> Determinant( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, - rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, - rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i) + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, + rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, + rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i) { - return a*e*i + b*f*g + c*d*h - c*e*g - b*d*i - a*f*h; + return a * e * i + b * f * g + c * d * h - c * e * g - b * d * i - a * f * h; } // Returns the determinant of a 4x4 matrix. rr::RValue<sw::SIMD::Float> Determinant( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, - rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, - rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, - rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p) + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, + rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, + rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, + rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p) { return a * Determinant(f, g, h, j, k, l, @@ -759,108 +762,130 @@ // Returns the inverse of a 2x2 matrix. std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, - rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d) + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, + rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d) { auto s = sw::SIMD::Float(1.0f) / Determinant(a, b, c, d); - return {{s*d, -s*b, -s*c, s*a}}; + return { { s * d, -s * b, -s * c, s * a } }; } // Returns the inverse of a 3x3 matrix. std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, - rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, - rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i) + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, + rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, + rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i) { auto s = sw::SIMD::Float(1.0f) / Determinant( - a, b, c, - d, e, f, - g, h, i); // TODO: duplicate arithmetic calculating the det and below. + a, b, c, + d, e, f, + g, h, i); // TODO: duplicate arithmetic calculating the det and below. - return {{ - s * (e*i - f*h), s * (c*h - b*i), s * (b*f - c*e), - s * (f*g - d*i), s * (a*i - c*g), s * (c*d - a*f), - s * (d*h - e*g), s * (b*g - a*h), s * (a*e - b*d), - }}; + return { { + s * (e * i - f * h), + s * (c * h - b * i), + s * (b * f - c * e), + s * (f * g - d * i), + s * (a * i - c * g), + s * (c * d - a * f), + s * (d * h - e * g), + s * (b * g - a * h), + s * (a * e - b * d), + } }; } // Returns the inverse of a 4x4 matrix. std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, - rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, - rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, - rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p) + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, + rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, + rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, + rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p) { auto s = sw::SIMD::Float(1.0f) / Determinant( - a, b, c, d, - e, f, g, h, - i, j, k, l, - m, n, o, p); // TODO: duplicate arithmetic calculating the det and below. + a, b, c, d, + e, f, g, h, + i, j, k, l, + m, n, o, p); // TODO: duplicate arithmetic calculating the det and below. - auto kplo = k*p - l*o, jpln = j*p - l*n, jokn = j*o - k*n; - auto gpho = g*p - h*o, fphn = f*p - h*n, fogn = f*o - g*n; - auto glhk = g*l - h*k, flhj = f*l - h*j, fkgj = f*k - g*j; - auto iplm = i*p - l*m, iokm = i*o - k*m, ephm = e*p - h*m; - auto eogm = e*o - g*m, elhi = e*l - h*i, ekgi = e*k - g*i; - auto injm = i*n - j*m, enfm = e*n - f*m, ejfi = e*j - f*i; + auto kplo = k * p - l * o, jpln = j * p - l * n, jokn = j * o - k * n; + auto gpho = g * p - h * o, fphn = f * p - h * n, fogn = f * o - g * n; + auto glhk = g * l - h * k, flhj = f * l - h * j, fkgj = f * k - g * j; + auto iplm = i * p - l * m, iokm = i * o - k * m, ephm = e * p - h * m; + auto eogm = e * o - g * m, elhi = e * l - h * i, ekgi = e * k - g * i; + auto injm = i * n - j * m, enfm = e * n - f * m, ejfi = e * j - f * i; - return {{ - s * ( f * kplo - g * jpln + h * jokn), + return { { + s * (f * kplo - g * jpln + h * jokn), s * (-b * kplo + c * jpln - d * jokn), - s * ( b * gpho - c * fphn + d * fogn), + s * (b * gpho - c * fphn + d * fogn), s * (-b * glhk + c * flhj - d * fkgj), s * (-e * kplo + g * iplm - h * iokm), - s * ( a * kplo - c * iplm + d * iokm), + s * (a * kplo - c * iplm + d * iokm), s * (-a * gpho + c * ephm - d * eogm), - s * ( a * glhk - c * elhi + d * ekgi), + s * (a * glhk - c * elhi + d * ekgi), - s * ( e * jpln - f * iplm + h * injm), + s * (e * jpln - f * iplm + h * injm), s * (-a * jpln + b * iplm - d * injm), - s * ( a * fphn - b * ephm + d * enfm), + s * (a * fphn - b * ephm + d * enfm), s * (-a * flhj + b * elhi - d * ejfi), s * (-e * jokn + f * iokm - g * injm), - s * ( a * jokn - b * iokm + c * injm), + s * (a * jokn - b * iokm + c * injm), s * (-a * fogn + b * eogm - c * enfm), - s * ( a * fkgj - b * ekgi + c * ejfi), - }}; + s * (a * fkgj - b * ekgi + c * ejfi), + } }; } namespace SIMD { Pointer::Pointer(rr::Pointer<Byte> base, rr::Int limit) - : base(base), - dynamicLimit(limit), staticLimit(0), - dynamicOffsets(0), staticOffsets{}, - hasDynamicLimit(true), hasDynamicOffsets(false) {} + : base(base) + , dynamicLimit(limit) + , staticLimit(0) + , dynamicOffsets(0) + , staticOffsets{} + , hasDynamicLimit(true) + , hasDynamicOffsets(false) +{} Pointer::Pointer(rr::Pointer<Byte> base, unsigned int limit) - : base(base), - dynamicLimit(0), staticLimit(limit), - dynamicOffsets(0), staticOffsets{}, - hasDynamicLimit(false), hasDynamicOffsets(false) {} + : base(base) + , dynamicLimit(0) + , staticLimit(limit) + , dynamicOffsets(0) + , staticOffsets{} + , hasDynamicLimit(false) + , hasDynamicOffsets(false) +{} Pointer::Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset) - : base(base), - dynamicLimit(limit), staticLimit(0), - dynamicOffsets(offset), staticOffsets{}, - hasDynamicLimit(true), hasDynamicOffsets(true) {} + : base(base) + , dynamicLimit(limit) + , staticLimit(0) + , dynamicOffsets(offset) + , staticOffsets{} + , hasDynamicLimit(true) + , hasDynamicOffsets(true) +{} Pointer::Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset) - : base(base), - dynamicLimit(0), staticLimit(limit), - dynamicOffsets(offset), staticOffsets{}, - hasDynamicLimit(false), hasDynamicOffsets(true) {} + : base(base) + , dynamicLimit(0) + , staticLimit(limit) + , dynamicOffsets(offset) + , staticOffsets{} + , hasDynamicLimit(false) + , hasDynamicOffsets(true) +{} -Pointer& Pointer::operator += (Int i) +Pointer &Pointer::operator+=(Int i) { dynamicOffsets += i; hasDynamicOffsets = true; return *this; } -Pointer& Pointer::operator *= (Int i) +Pointer &Pointer::operator*=(Int i) { dynamicOffsets = offsets() * i; staticOffsets = {}; @@ -868,16 +893,26 @@ return *this; } -Pointer Pointer::operator + (SIMD::Int i) { Pointer p = *this; p += i; return p; } -Pointer Pointer::operator * (SIMD::Int i) { Pointer p = *this; p *= i; return p; } +Pointer Pointer::operator+(SIMD::Int i) +{ + Pointer p = *this; + p += i; + return p; +} +Pointer Pointer::operator*(SIMD::Int i) +{ + Pointer p = *this; + p *= i; + return p; +} -Pointer& Pointer::operator += (int i) +Pointer &Pointer::operator+=(int i) { for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; } return *this; } -Pointer& Pointer::operator *= (int i) +Pointer &Pointer::operator*=(int i) { for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] *= i; } if(hasDynamicOffsets) @@ -887,8 +922,18 @@ return *this; } -Pointer Pointer::operator + (int i) { Pointer p = *this; p += i; return p; } -Pointer Pointer::operator * (int i) { Pointer p = *this; p *= i; return p; } +Pointer Pointer::operator+(int i) +{ + Pointer p = *this; + p += i; + return p; +} +Pointer Pointer::operator*(int i) +{ + Pointer p = *this; + p *= i; + return p; +} SIMD::Int Pointer::offsets() const { @@ -910,10 +955,10 @@ // Common fast paths. static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4"); return SIMD::Int( - (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, - (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, - (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, - (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0); + (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, + (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, + (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, + (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0); } return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit())); @@ -932,14 +977,14 @@ { switch(robustness) { - case OutOfBoundsBehavior::UndefinedBehavior: - // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes, - // but since it can't know in advance which branches are taken this must be true even for inactives lanes. - return true; - case OutOfBoundsBehavior::Nullify: - case OutOfBoundsBehavior::RobustBufferAccess: - case OutOfBoundsBehavior::UndefinedValue: - return false; + case OutOfBoundsBehavior::UndefinedBehavior: + // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes, + // but since it can't know in advance which branches are taken this must be true even for inactives lanes. + return true; + case OutOfBoundsBehavior::Nullify: + case OutOfBoundsBehavior::RobustBufferAccess: + case OutOfBoundsBehavior::UndefinedValue: + return false; } } } @@ -968,7 +1013,7 @@ { auto o = offsets(); static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4"); - return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1*step, 2*step, 3*step, 0))) == 0; + return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1 * step, 2 * step, 3 * step, 0))) == 0; } return hasStaticSequentialOffsets(step); } @@ -983,7 +1028,7 @@ } for(int i = 1; i < SIMD::Width; i++) { - if(staticOffsets[i-1] + int32_t(step) != staticOffsets[i]) { return false; } + if(staticOffsets[i - 1] + int32_t(step) != staticOffsets[i]) { return false; } } return true; } @@ -1010,7 +1055,7 @@ } for(int i = 1; i < SIMD::Width; i++) { - if(staticOffsets[i-1] != staticOffsets[i]) { return false; } + if(staticOffsets[i - 1] != staticOffsets[i]) { return false; } } return true; }
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp index 4026d27..a911be2 100644 --- a/src/Pipeline/ShaderCore.hpp +++ b/src/Pipeline/ShaderCore.hpp
@@ -19,9 +19,9 @@ #include "Reactor/Reactor.hpp" #include "Vulkan/VkDebug.hpp" -#include <atomic> // std::memory_order #include <array> -#include <utility> // std::pair +#include <atomic> // std::memory_order +#include <utility> // std::pair namespace sw { @@ -87,17 +87,17 @@ Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset); Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset); - Pointer& operator += (Int i); - Pointer& operator *= (Int i); + Pointer &operator+=(Int i); + Pointer &operator*=(Int i); - Pointer operator + (SIMD::Int i); - Pointer operator * (SIMD::Int i); + Pointer operator+(SIMD::Int i); + Pointer operator*(SIMD::Int i); - Pointer& operator += (int i); - Pointer& operator *= (int i); + Pointer &operator+=(int i); + Pointer &operator*=(int i); - Pointer operator + (int i); - Pointer operator * (int i); + Pointer operator+(int i); + Pointer operator*(int i); SIMD::Int offsets() const; @@ -135,23 +135,37 @@ rr::Pointer<rr::Byte> base; // Upper (non-inclusive) limit for offsets from base. - rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero. + rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero. unsigned int staticLimit; // Per lane offsets from base. - SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero. + SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero. std::array<int32_t, SIMD::Width> staticOffsets; bool hasDynamicLimit; // True if dynamicLimit is non-zero. bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero. }; -template <typename T> struct Element {}; -template <> struct Element<Float> { using type = rr::Float; }; -template <> struct Element<Int> { using type = rr::Int; }; -template <> struct Element<UInt> { using type = rr::UInt; }; +template<typename T> +struct Element +{}; +template<> +struct Element<Float> +{ + using type = rr::Float; +}; +template<> +struct Element<Int> +{ + using type = rr::Int; +}; +template<> +struct Element<UInt> +{ + using type = rr::UInt; +}; -} // namespace SIMD +} // namespace SIMD Float4 exponential2(RValue<Float4> x, bool pp = false); Float4 logarithm2(RValue<Float4> x, bool pp = false); @@ -161,8 +175,8 @@ Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false); Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false); Float4 modulo(RValue<Float4> x, RValue<Float4> y); -Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range -Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range +Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range +Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range Float4 sine(RValue<Float4> x, bool pp = false); Float4 cosine(RValue<Float4> x, bool pp = false); Float4 tangent(RValue<Float4> x, bool pp = false); @@ -199,10 +213,10 @@ rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints); -template <typename T> +template<typename T> inline rr::RValue<T> AndAll(rr::RValue<T> const &mask); -template <typename T> +template<typename T> inline rr::RValue<T> OrAll(rr::RValue<T> const &mask); rr::RValue<sw::SIMD::Float> Sign(rr::RValue<sw::SIMD::Float> const &val); @@ -224,9 +238,9 @@ // Performs a fused-multiply add, returning a * b + c. rr::RValue<sw::SIMD::Float> FMA( - rr::RValue<sw::SIMD::Float> const &a, - rr::RValue<sw::SIMD::Float> const &b, - rr::RValue<sw::SIMD::Float> const &c); + rr::RValue<sw::SIMD::Float> const &a, + rr::RValue<sw::SIMD::Float> const &b, + rr::RValue<sw::SIMD::Float> const &c); // Returns the exponent of the floating point number f. // Assumes IEEE 754 @@ -244,39 +258,39 @@ // Returns the determinant of a 2x2 matrix. rr::RValue<sw::SIMD::Float> Determinant( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, - rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d); + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, + rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d); // Returns the determinant of a 3x3 matrix. rr::RValue<sw::SIMD::Float> Determinant( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, - rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, - rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i); + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, + rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, + rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i); // Returns the determinant of a 4x4 matrix. rr::RValue<sw::SIMD::Float> Determinant( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, - rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, - rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, - rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p); + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, + rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, + rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, + rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p); // Returns the inverse of a 2x2 matrix. std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, - rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d); + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, + rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d); // Returns the inverse of a 3x3 matrix. std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, - rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, - rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i); + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, + rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, + rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i); // Returns the inverse of a 4x4 matrix. std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse( - rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, - rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, - rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, - rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p); + rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, + rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, + rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, + rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p); //////////////////////////////////////////////////////////////////////////// // Inline functions @@ -307,14 +321,14 @@ { switch(robustness) { - case OutOfBoundsBehavior::Nullify: - case OutOfBoundsBehavior::RobustBufferAccess: - case OutOfBoundsBehavior::UndefinedValue: - mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads. - break; - case OutOfBoundsBehavior::UndefinedBehavior: - // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. - break; + case OutOfBoundsBehavior::Nullify: + case OutOfBoundsBehavior::RobustBufferAccess: + case OutOfBoundsBehavior::UndefinedValue: + mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads. + break; + case OutOfBoundsBehavior::UndefinedBehavior: + // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. + break; } } @@ -339,14 +353,14 @@ bool zeroMaskedLanes = true; switch(robustness) { - case OutOfBoundsBehavior::Nullify: - case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero. - zeroMaskedLanes = true; - break; - case OutOfBoundsBehavior::UndefinedValue: - case OutOfBoundsBehavior::UndefinedBehavior: - zeroMaskedLanes = false; - break; + case OutOfBoundsBehavior::Nullify: + case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero. + zeroMaskedLanes = true; + break; + case OutOfBoundsBehavior::UndefinedValue: + case OutOfBoundsBehavior::UndefinedBehavior: + zeroMaskedLanes = false; + break; } if(hasStaticSequentialOffsets(sizeof(float))) @@ -399,14 +413,14 @@ switch(robustness) { - case OutOfBoundsBehavior::Nullify: - case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking. - case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access. - mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes. - break; - case OutOfBoundsBehavior::UndefinedBehavior: - // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. - break; + case OutOfBoundsBehavior::Nullify: + case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking. + case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access. + mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes. + break; + case OutOfBoundsBehavior::UndefinedBehavior: + // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. + break; } if(!atomic && order == std::memory_order_relaxed) @@ -420,9 +434,9 @@ auto elect = mask & ~(v0111 & (mask.xxyz | mask.xxxy | mask.xxxx)); auto maskedVal = As<SIMD::Int>(val) & elect; auto scalarVal = Extract(maskedVal, 0) | - Extract(maskedVal, 1) | - Extract(maskedVal, 2) | - Extract(maskedVal, 3); + Extract(maskedVal, 1) | + Extract(maskedVal, 2) | + Extract(maskedVal, 3); *rr::Pointer<EL>(base + staticOffsets[0], alignment) = As<EL>(scalarVal); } } @@ -476,29 +490,30 @@ Store(T(val), robustness, mask, atomic, order); } -template <typename T> +template<typename T> inline rr::RValue<T> AndAll(rr::RValue<T> const &mask) { - T v1 = mask; // [x] [y] [z] [w] - T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw] - return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw] + T v1 = mask; // [x] [y] [z] [w] + T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw] + return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw] } -template <typename T> +template<typename T> inline rr::RValue<T> OrAll(rr::RValue<T> const &mask) { - T v1 = mask; // [x] [y] [z] [w] - T v2 = v1.xzxz | v1.ywyw; // [xy] [zw] [xy] [zw] - return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw] + T v1 = mask; // [x] [y] [z] [w] + T v2 = v1.xzxz | v1.ywyw; // [xy] [zw] [xy] [zw] + return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw] } -} // namespace sw +} // namespace sw #ifdef ENABLE_RR_PRINT namespace rr { -template <> struct PrintValue::Ty<sw::Vector4f> +template<> +struct PrintValue::Ty<sw::Vector4f> { - static std::string fmt(const sw::Vector4f& v) + static std::string fmt(const sw::Vector4f &v) { return "[x: " + PrintValue::fmt(v.x) + ", y: " + PrintValue::fmt(v.y) + @@ -506,14 +521,15 @@ ", w: " + PrintValue::fmt(v.w) + "]"; } - static std::vector<rr::Value*> val(const sw::Vector4f& v) + static std::vector<rr::Value *> val(const sw::Vector4f &v) { return PrintValue::vals(v.x, v.y, v.z, v.w); } }; -template <> struct PrintValue::Ty<sw::Vector4s> +template<> +struct PrintValue::Ty<sw::Vector4s> { - static std::string fmt(const sw::Vector4s& v) + static std::string fmt(const sw::Vector4s &v) { return "[x: " + PrintValue::fmt(v.x) + ", y: " + PrintValue::fmt(v.y) + @@ -521,13 +537,13 @@ ", w: " + PrintValue::fmt(v.w) + "]"; } - static std::vector<rr::Value*> val(const sw::Vector4s& v) + static std::vector<rr::Value *> val(const sw::Vector4s &v) { return PrintValue::vals(v.x, v.y, v.z, v.w); } }; -} // namespace sw -#endif // ENABLE_RR_PRINT +} // namespace rr +#endif // ENABLE_RR_PRINT -#endif // sw_ShaderCore_hpp +#endif // sw_ShaderCore_hpp
diff --git a/src/Pipeline/SpirvID.hpp b/src/Pipeline/SpirvID.hpp index 1f4624d..35b910a 100644 --- a/src/Pipeline/SpirvID.hpp +++ b/src/Pipeline/SpirvID.hpp
@@ -15,8 +15,8 @@ #ifndef sw_ID_hpp #define sw_ID_hpp -#include <unordered_map> #include <cstdint> +#include <unordered_map> namespace sw { @@ -25,39 +25,43 @@ // ID; instead it is used to prevent implicit casts between identifiers of // different T types. // IDs are typically used as a map key to value of type T. -template <typename T> +template<typename T> class SpirvID { public: - SpirvID() : id(0) {} - SpirvID(uint32_t id) : id(id) {} - bool operator == (const SpirvID<T>& rhs) const { return id == rhs.id; } - bool operator != (const SpirvID<T>& rhs) const { return id != rhs.id; } - bool operator < (const SpirvID<T>& rhs) const { return id < rhs.id; } + SpirvID() + : id(0) + {} + SpirvID(uint32_t id) + : id(id) + {} + bool operator==(const SpirvID<T> &rhs) const { return id == rhs.id; } + bool operator!=(const SpirvID<T> &rhs) const { return id != rhs.id; } + bool operator<(const SpirvID<T> &rhs) const { return id < rhs.id; } // value returns the numerical value of the identifier. uint32_t value() const { return id; } + private: uint32_t id; }; // HandleMap<T> is an unordered map of SpirvID<T> to T. -template <typename T> +template<typename T> using HandleMap = std::unordered_map<SpirvID<T>, T>; -} +} // namespace sw -namespace std -{ +namespace std { // std::hash implementation for sw::SpirvID<T> template<typename T> -struct hash< sw::SpirvID<T> > +struct hash<sw::SpirvID<T> > { - std::size_t operator()(const sw::SpirvID<T>& id) const noexcept + std::size_t operator()(const sw::SpirvID<T> &id) const noexcept { return std::hash<uint32_t>()(id.value()); } }; -} // namespace sw +} // namespace std #endif // sw_ID_hpp
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp index 11b3770..2350195 100644 --- a/src/Pipeline/SpirvShader.cpp +++ b/src/Pipeline/SpirvShader.cpp
@@ -23,17 +23,18 @@ namespace sw { SpirvShader::SpirvShader( - uint32_t codeSerialID, - VkShaderStageFlagBits pipelineStage, - const char *entryPointName, - InsnStore const &insns, - const vk::RenderPass *renderPass, - uint32_t subpassIndex, - bool robustBufferAccess) - : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS}, - outputs{MAX_INTERFACE_COMPONENTS}, - codeSerialID(codeSerialID), - robustBufferAccess(robustBufferAccess) + uint32_t codeSerialID, + VkShaderStageFlagBits pipelineStage, + const char *entryPointName, + InsnStore const &insns, + const vk::RenderPass *renderPass, + uint32_t subpassIndex, + bool robustBufferAccess) + : insns{ insns } + , inputs{ MAX_INTERFACE_COMPONENTS } + , outputs{ MAX_INTERFACE_COMPONENTS } + , codeSerialID(codeSerialID) + , robustBufferAccess(robustBufferAccess) { ASSERT(insns.size() > 0); @@ -46,7 +47,8 @@ { auto attachmentIndex = subpass.pInputAttachments[i].attachment; inputAttachmentFormats.push_back(attachmentIndex != VK_ATTACHMENT_UNUSED - ? renderPass->getAttachment(attachmentIndex).format : VK_FORMAT_UNDEFINED); + ? renderPass->getAttachment(attachmentIndex).format + : VK_FORMAT_UNDEFINED); } } @@ -63,403 +65,404 @@ switch(opcode) { - case spv::OpEntryPoint: - { - executionModel = spv::ExecutionModel(insn.word(1)); - auto id = Function::ID(insn.word(2)); - auto name = insn.string(3); - auto stage = executionModelToStage(executionModel); - if(stage == pipelineStage && strcmp(name, entryPointName) == 0) + case spv::OpEntryPoint: { - ASSERT_MSG(entryPoint == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage)); - entryPoint = id; - } - break; - } - - case spv::OpExecutionMode: - ProcessExecutionMode(insn); - break; - - case spv::OpDecorate: - { - TypeOrObjectID targetId = insn.word(1); - auto decoration = static_cast<spv::Decoration>(insn.word(2)); - uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0; - - decorations[targetId].Apply(decoration, value); - - switch(decoration) - { - case spv::DecorationDescriptorSet: - descriptorDecorations[targetId].DescriptorSet = value; - break; - case spv::DecorationBinding: - descriptorDecorations[targetId].Binding = value; - break; - case spv::DecorationInputAttachmentIndex: - descriptorDecorations[targetId].InputAttachmentIndex = value; - break; - default: - // Only handling descriptor decorations here. + executionModel = spv::ExecutionModel(insn.word(1)); + auto id = Function::ID(insn.word(2)); + auto name = insn.string(3); + auto stage = executionModelToStage(executionModel); + if(stage == pipelineStage && strcmp(name, entryPointName) == 0) + { + ASSERT_MSG(entryPoint == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage)); + entryPoint = id; + } break; } - if(decoration == spv::DecorationCentroid) - modes.NeedsCentroid = true; - break; - } + case spv::OpExecutionMode: + ProcessExecutionMode(insn); + break; - case spv::OpMemberDecorate: - { - Type::ID targetId = insn.word(1); - auto memberIndex = insn.word(2); - auto decoration = static_cast<spv::Decoration>(insn.word(3)); - uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0; - - auto &d = memberDecorations[targetId]; - if(memberIndex >= d.size()) - d.resize(memberIndex + 1); // on demand; exact size would require another pass... - - d[memberIndex].Apply(decoration, value); - - if(decoration == spv::DecorationCentroid) - modes.NeedsCentroid = true; - break; - } - - case spv::OpDecorationGroup: - // Nothing to do here. We don't need to record the definition of the group; we'll just have - // the bundle of decorations float around. If we were to ever walk the decorations directly, - // we might think about introducing this as a real Object. - break; - - case spv::OpGroupDecorate: - { - uint32_t group = insn.word(1); - auto const &groupDecorations = decorations[group]; - auto const &descriptorGroupDecorations = descriptorDecorations[group]; - for(auto i = 2u; i < insn.wordCount(); i++) + case spv::OpDecorate: { - // Remaining operands are targets to apply the group to. - uint32_t target = insn.word(i); - decorations[target].Apply(groupDecorations); - descriptorDecorations[target].Apply(descriptorGroupDecorations); + TypeOrObjectID targetId = insn.word(1); + auto decoration = static_cast<spv::Decoration>(insn.word(2)); + uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0; + + decorations[targetId].Apply(decoration, value); + + switch(decoration) + { + case spv::DecorationDescriptorSet: + descriptorDecorations[targetId].DescriptorSet = value; + break; + case spv::DecorationBinding: + descriptorDecorations[targetId].Binding = value; + break; + case spv::DecorationInputAttachmentIndex: + descriptorDecorations[targetId].InputAttachmentIndex = value; + break; + default: + // Only handling descriptor decorations here. + break; + } + + if(decoration == spv::DecorationCentroid) + modes.NeedsCentroid = true; + break; } - break; - } - - case spv::OpGroupMemberDecorate: - { - auto const &srcDecorations = decorations[insn.word(1)]; - for(auto i = 2u; i < insn.wordCount(); i += 2) + case spv::OpMemberDecorate: { - // remaining operands are pairs of <id>, literal for members to apply to. - auto &d = memberDecorations[insn.word(i)]; - auto memberIndex = insn.word(i + 1); + Type::ID targetId = insn.word(1); + auto memberIndex = insn.word(2); + auto decoration = static_cast<spv::Decoration>(insn.word(3)); + uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0; + + auto &d = memberDecorations[targetId]; if(memberIndex >= d.size()) - d.resize(memberIndex + 1); // on demand resize, see above... - d[memberIndex].Apply(srcDecorations); + d.resize(memberIndex + 1); // on demand; exact size would require another pass... + + d[memberIndex].Apply(decoration, value); + + if(decoration == spv::DecorationCentroid) + modes.NeedsCentroid = true; + break; } - break; - } - case spv::OpLabel: - { - ASSERT(currentBlock.value() == 0); - currentBlock = Block::ID(insn.word(1)); - blockStart = insn; - break; - } - - // Branch Instructions (subset of Termination Instructions): - case spv::OpBranch: - case spv::OpBranchConditional: - case spv::OpSwitch: - case spv::OpReturn: - // fallthrough - - // Termination instruction: - case spv::OpKill: - case spv::OpUnreachable: - { - ASSERT(currentBlock.value() != 0); - ASSERT(currentFunction.value() != 0); - - auto blockEnd = insn; blockEnd++; - functions[currentFunction].blocks[currentBlock] = Block(blockStart, blockEnd); - currentBlock = Block::ID(0); - - if(opcode == spv::OpKill) - { - modes.ContainsKill = true; - } - break; - } - - case spv::OpLoopMerge: - case spv::OpSelectionMerge: - break; // Nothing to do in analysis pass. - - case spv::OpTypeVoid: - case spv::OpTypeBool: - case spv::OpTypeInt: - case spv::OpTypeFloat: - case spv::OpTypeVector: - case spv::OpTypeMatrix: - case spv::OpTypeImage: - case spv::OpTypeSampler: - case spv::OpTypeSampledImage: - case spv::OpTypeArray: - case spv::OpTypeRuntimeArray: - case spv::OpTypeStruct: - case spv::OpTypePointer: - case spv::OpTypeFunction: - DeclareType(insn); - break; - - case spv::OpVariable: - { - Type::ID typeId = insn.word(1); - Object::ID resultId = insn.word(2); - auto storageClass = static_cast<spv::StorageClass>(insn.word(3)); - - auto &object = defs[resultId]; - object.kind = Object::Kind::Pointer; - object.definition = insn; - object.type = typeId; - - ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer); - ASSERT(getType(typeId).storageClass == storageClass); - - switch(storageClass) - { - case spv::StorageClassInput: - case spv::StorageClassOutput: - ProcessInterfaceVariable(object); + case spv::OpDecorationGroup: + // Nothing to do here. We don't need to record the definition of the group; we'll just have + // the bundle of decorations float around. If we were to ever walk the decorations directly, + // we might think about introducing this as a real Object. break; - case spv::StorageClassUniform: - case spv::StorageClassStorageBuffer: - object.kind = Object::Kind::DescriptorSet; + case spv::OpGroupDecorate: + { + uint32_t group = insn.word(1); + auto const &groupDecorations = decorations[group]; + auto const &descriptorGroupDecorations = descriptorDecorations[group]; + for(auto i = 2u; i < insn.wordCount(); i++) + { + // Remaining operands are targets to apply the group to. + uint32_t target = insn.word(i); + decorations[target].Apply(groupDecorations); + descriptorDecorations[target].Apply(descriptorGroupDecorations); + } + + break; + } + + case spv::OpGroupMemberDecorate: + { + auto const &srcDecorations = decorations[insn.word(1)]; + for(auto i = 2u; i < insn.wordCount(); i += 2) + { + // remaining operands are pairs of <id>, literal for members to apply to. + auto &d = memberDecorations[insn.word(i)]; + auto memberIndex = insn.word(i + 1); + if(memberIndex >= d.size()) + d.resize(memberIndex + 1); // on demand resize, see above... + d[memberIndex].Apply(srcDecorations); + } + break; + } + + case spv::OpLabel: + { + ASSERT(currentBlock.value() == 0); + currentBlock = Block::ID(insn.word(1)); + blockStart = insn; + break; + } + + // Branch Instructions (subset of Termination Instructions): + case spv::OpBranch: + case spv::OpBranchConditional: + case spv::OpSwitch: + case spv::OpReturn: + // fallthrough + + // Termination instruction: + case spv::OpKill: + case spv::OpUnreachable: + { + ASSERT(currentBlock.value() != 0); + ASSERT(currentFunction.value() != 0); + + auto blockEnd = insn; + blockEnd++; + functions[currentFunction].blocks[currentBlock] = Block(blockStart, blockEnd); + currentBlock = Block::ID(0); + + if(opcode == spv::OpKill) + { + modes.ContainsKill = true; + } + break; + } + + case spv::OpLoopMerge: + case spv::OpSelectionMerge: + break; // Nothing to do in analysis pass. + + case spv::OpTypeVoid: + case spv::OpTypeBool: + case spv::OpTypeInt: + case spv::OpTypeFloat: + case spv::OpTypeVector: + case spv::OpTypeMatrix: + case spv::OpTypeImage: + case spv::OpTypeSampler: + case spv::OpTypeSampledImage: + case spv::OpTypeArray: + case spv::OpTypeRuntimeArray: + case spv::OpTypeStruct: + case spv::OpTypePointer: + case spv::OpTypeFunction: + DeclareType(insn); break; - case spv::StorageClassPushConstant: - case spv::StorageClassPrivate: - case spv::StorageClassFunction: - case spv::StorageClassUniformConstant: - break; // Correctly handled. - - case spv::StorageClassWorkgroup: + case spv::OpVariable: { - auto &elTy = getType(getType(typeId).element); - auto sizeInBytes = elTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)); - workgroupMemory.allocate(resultId, sizeInBytes); + Type::ID typeId = insn.word(1); + Object::ID resultId = insn.word(2); + auto storageClass = static_cast<spv::StorageClass>(insn.word(3)); + + auto &object = defs[resultId]; object.kind = Object::Kind::Pointer; - break; - } - case spv::StorageClassAtomicCounter: - case spv::StorageClassImage: - UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass); - break; + object.definition = insn; + object.type = typeId; - case spv::StorageClassCrossWorkgroup: - UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)"); - break; + ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer); + ASSERT(getType(typeId).storageClass == storageClass); - case spv::StorageClassGeneric: - UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)"); - break; - - default: - UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec. - break; - } - break; - } - - case spv::OpConstant: - case spv::OpSpecConstant: - CreateConstant(insn).constantValue[0] = insn.word(3); - break; - case spv::OpConstantFalse: - case spv::OpSpecConstantFalse: - CreateConstant(insn).constantValue[0] = 0; // Represent Boolean false as zero. - break; - case spv::OpConstantTrue: - case spv::OpSpecConstantTrue: - CreateConstant(insn).constantValue[0] = ~0u; // Represent Boolean true as all bits set. - break; - case spv::OpConstantNull: - case spv::OpUndef: - { - // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value. - // OpConstantNull forms a constant of arbitrary type, all zeros. - auto &object = CreateConstant(insn); - auto &objectTy = getType(object.type); - for(auto i = 0u; i < objectTy.sizeInComponents; i++) - { - object.constantValue[i] = 0; - } - break; - } - case spv::OpConstantComposite: - case spv::OpSpecConstantComposite: - { - auto &object = CreateConstant(insn); - auto offset = 0u; - for(auto i = 0u; i < insn.wordCount() - 3; i++) - { - auto &constituent = getObject(insn.word(i + 3)); - auto &constituentTy = getType(constituent.type); - for(auto j = 0u; j < constituentTy.sizeInComponents; j++) + switch(storageClass) { - object.constantValue[offset++] = constituent.constantValue[j]; + case spv::StorageClassInput: + case spv::StorageClassOutput: + ProcessInterfaceVariable(object); + break; + + case spv::StorageClassUniform: + case spv::StorageClassStorageBuffer: + object.kind = Object::Kind::DescriptorSet; + break; + + case spv::StorageClassPushConstant: + case spv::StorageClassPrivate: + case spv::StorageClassFunction: + case spv::StorageClassUniformConstant: + break; // Correctly handled. + + case spv::StorageClassWorkgroup: + { + auto &elTy = getType(getType(typeId).element); + auto sizeInBytes = elTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)); + workgroupMemory.allocate(resultId, sizeInBytes); + object.kind = Object::Kind::Pointer; + break; + } + case spv::StorageClassAtomicCounter: + case spv::StorageClassImage: + UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass); + break; + + case spv::StorageClassCrossWorkgroup: + UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)"); + break; + + case spv::StorageClassGeneric: + UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)"); + break; + + default: + UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec. + break; } + break; } - auto objectId = Object::ID(insn.word(2)); - auto decorationsIt = decorations.find(objectId); - if(decorationsIt != decorations.end() && - decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize) + case spv::OpConstant: + case spv::OpSpecConstant: + CreateConstant(insn).constantValue[0] = insn.word(3); + break; + case spv::OpConstantFalse: + case spv::OpSpecConstantFalse: + CreateConstant(insn).constantValue[0] = 0; // Represent Boolean false as zero. + break; + case spv::OpConstantTrue: + case spv::OpSpecConstantTrue: + CreateConstant(insn).constantValue[0] = ~0u; // Represent Boolean true as all bits set. + break; + case spv::OpConstantNull: + case spv::OpUndef: { - // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables : - // Decorating an object with the WorkgroupSize built-in - // decoration will make that object contain the dimensions - // of a local workgroup. If an object is decorated with the - // WorkgroupSize decoration, this must take precedence over - // any execution mode set for LocalSize. - // The object decorated with WorkgroupSize must be declared - // as a three-component vector of 32-bit integers. - ASSERT(getType(object.type).sizeInComponents == 3); - modes.WorkgroupSizeX = object.constantValue[0]; - modes.WorkgroupSizeY = object.constantValue[1]; - modes.WorkgroupSizeZ = object.constantValue[2]; - } - break; - } - case spv::OpSpecConstantOp: - EvalSpecConstantOp(insn); - break; - - case spv::OpCapability: - { - auto capability = static_cast<spv::Capability>(insn.word(1)); - switch(capability) - { - case spv::CapabilityMatrix: capabilities.Matrix = true; break; - case spv::CapabilityShader: capabilities.Shader = true; break; - case spv::CapabilityClipDistance: capabilities.ClipDistance = true; break; - case spv::CapabilityCullDistance: capabilities.CullDistance = true; break; - case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break; - case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break; - case spv::CapabilityImage1D: capabilities.Image1D = true; break; - case spv::CapabilityImageCubeArray: capabilities.ImageCubeArray = true; break; - case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break; - case spv::CapabilitySampledCubeArray: capabilities.SampledCubeArray = true; break; - case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break; - case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break; - case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break; - case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break; - case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break; - case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break; - case spv::CapabilityGroupNonUniformArithmetic: capabilities.GroupNonUniformArithmetic = true; break; - case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break; - case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break; - case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break; - case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break; - case spv::CapabilityMultiView: capabilities.MultiView = true; break; - default: - UNSUPPORTED("Unsupported capability %u", insn.word(1)); - } - break; // Various capabilities will be declared, but none affect our code generation at this point. - } - - case spv::OpMemoryModel: - break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support. - - case spv::OpFunction: - { - auto functionId = Function::ID(insn.word(2)); - ASSERT_MSG(currentFunction == 0, "Functions %d and %d overlap", currentFunction.value(), functionId.value()); - currentFunction = functionId; - auto &function = functions[functionId]; - function.result = Type::ID(insn.word(1)); - function.type = Type::ID(insn.word(4)); - // Scan forward to find the function's label. - for(auto it = insn; it != end() && function.entry == 0; it++) - { - switch(it.opcode()) + // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value. + // OpConstantNull forms a constant of arbitrary type, all zeros. + auto &object = CreateConstant(insn); + auto &objectTy = getType(object.type); + for(auto i = 0u; i < objectTy.sizeInComponents; i++) { - case spv::OpFunction: - case spv::OpFunctionParameter: - break; - case spv::OpLabel: - function.entry = Block::ID(it.word(1)); - break; - default: - WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str()); + object.constantValue[i] = 0; } + break; } - ASSERT_MSG(function.entry != 0, "Function<%d> has no label", currentFunction.value()); - break; - } - - case spv::OpFunctionEnd: - currentFunction = 0; - break; - - case spv::OpExtInstImport: - { - // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it. - // Valid shaders will not attempt to import any other instruction sets. - auto ext = insn.string(2); - if(0 != strcmp("GLSL.std.450", ext)) + case spv::OpConstantComposite: + case spv::OpSpecConstantComposite: { - UNSUPPORTED("SPIR-V Extension: %s", ext); + auto &object = CreateConstant(insn); + auto offset = 0u; + for(auto i = 0u; i < insn.wordCount() - 3; i++) + { + auto &constituent = getObject(insn.word(i + 3)); + auto &constituentTy = getType(constituent.type); + for(auto j = 0u; j < constituentTy.sizeInComponents; j++) + { + object.constantValue[offset++] = constituent.constantValue[j]; + } + } + + auto objectId = Object::ID(insn.word(2)); + auto decorationsIt = decorations.find(objectId); + if(decorationsIt != decorations.end() && + decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize) + { + // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables : + // Decorating an object with the WorkgroupSize built-in + // decoration will make that object contain the dimensions + // of a local workgroup. If an object is decorated with the + // WorkgroupSize decoration, this must take precedence over + // any execution mode set for LocalSize. + // The object decorated with WorkgroupSize must be declared + // as a three-component vector of 32-bit integers. + ASSERT(getType(object.type).sizeInComponents == 3); + modes.WorkgroupSizeX = object.constantValue[0]; + modes.WorkgroupSizeY = object.constantValue[1]; + modes.WorkgroupSizeZ = object.constantValue[2]; + } + break; } - break; - } - case spv::OpName: - case spv::OpMemberName: - case spv::OpSource: - case spv::OpSourceContinued: - case spv::OpSourceExtension: - case spv::OpLine: - case spv::OpNoLine: - case spv::OpModuleProcessed: - case spv::OpString: - // No semantic impact - break; + case spv::OpSpecConstantOp: + EvalSpecConstantOp(insn); + break; - case spv::OpFunctionParameter: - // These should have all been removed by preprocessing passes. If we see them here, - // our assumptions are wrong and we will probably generate wrong code. - UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str()); - break; + case spv::OpCapability: + { + auto capability = static_cast<spv::Capability>(insn.word(1)); + switch(capability) + { + case spv::CapabilityMatrix: capabilities.Matrix = true; break; + case spv::CapabilityShader: capabilities.Shader = true; break; + case spv::CapabilityClipDistance: capabilities.ClipDistance = true; break; + case spv::CapabilityCullDistance: capabilities.CullDistance = true; break; + case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break; + case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break; + case spv::CapabilityImage1D: capabilities.Image1D = true; break; + case spv::CapabilityImageCubeArray: capabilities.ImageCubeArray = true; break; + case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break; + case spv::CapabilitySampledCubeArray: capabilities.SampledCubeArray = true; break; + case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break; + case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break; + case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break; + case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break; + case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break; + case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break; + case spv::CapabilityGroupNonUniformArithmetic: capabilities.GroupNonUniformArithmetic = true; break; + case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break; + case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break; + case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break; + case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break; + case spv::CapabilityMultiView: capabilities.MultiView = true; break; + default: + UNSUPPORTED("Unsupported capability %u", insn.word(1)); + } + break; // Various capabilities will be declared, but none affect our code generation at this point. + } - case spv::OpFunctionCall: - // TODO(b/141246700): Add full support for spv::OpFunctionCall - break; + case spv::OpMemoryModel: + break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support. - case spv::OpFConvert: - UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)"); - break; + case spv::OpFunction: + { + auto functionId = Function::ID(insn.word(2)); + ASSERT_MSG(currentFunction == 0, "Functions %d and %d overlap", currentFunction.value(), functionId.value()); + currentFunction = functionId; + auto &function = functions[functionId]; + function.result = Type::ID(insn.word(1)); + function.type = Type::ID(insn.word(4)); + // Scan forward to find the function's label. + for(auto it = insn; it != end() && function.entry == 0; it++) + { + switch(it.opcode()) + { + case spv::OpFunction: + case spv::OpFunctionParameter: + break; + case spv::OpLabel: + function.entry = Block::ID(it.word(1)); + break; + default: + WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str()); + } + } + ASSERT_MSG(function.entry != 0, "Function<%d> has no label", currentFunction.value()); + break; + } - case spv::OpSConvert: - UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)"); - break; + case spv::OpFunctionEnd: + currentFunction = 0; + break; - case spv::OpUConvert: - UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)"); - break; + case spv::OpExtInstImport: + { + // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it. + // Valid shaders will not attempt to import any other instruction sets. + auto ext = insn.string(2); + if(0 != strcmp("GLSL.std.450", ext)) + { + UNSUPPORTED("SPIR-V Extension: %s", ext); + } + break; + } + case spv::OpName: + case spv::OpMemberName: + case spv::OpSource: + case spv::OpSourceContinued: + case spv::OpSourceExtension: + case spv::OpLine: + case spv::OpNoLine: + case spv::OpModuleProcessed: + case spv::OpString: + // No semantic impact + break; - case spv::OpLoad: - case spv::OpAccessChain: - case spv::OpInBoundsAccessChain: - case spv::OpSampledImage: - case spv::OpImage: + case spv::OpFunctionParameter: + // These should have all been removed by preprocessing passes. If we see them here, + // our assumptions are wrong and we will probably generate wrong code. + UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str()); + break; + + case spv::OpFunctionCall: + // TODO(b/141246700): Add full support for spv::OpFunctionCall + break; + + case spv::OpFConvert: + UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)"); + break; + + case spv::OpSConvert: + UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)"); + break; + + case spv::OpUConvert: + UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)"); + break; + + case spv::OpLoad: + case spv::OpAccessChain: + case spv::OpInBoundsAccessChain: + case spv::OpSampledImage: + case spv::OpImage: { // Propagate the descriptor decorations to the result. Object::ID resultId = insn.word(2); @@ -484,202 +487,202 @@ } break; - case spv::OpCompositeConstruct: - case spv::OpCompositeInsert: - case spv::OpCompositeExtract: - case spv::OpVectorShuffle: - case spv::OpVectorTimesScalar: - case spv::OpMatrixTimesScalar: - case spv::OpMatrixTimesVector: - case spv::OpVectorTimesMatrix: - case spv::OpMatrixTimesMatrix: - case spv::OpOuterProduct: - case spv::OpTranspose: - case spv::OpVectorExtractDynamic: - case spv::OpVectorInsertDynamic: - // Unary ops - case spv::OpNot: - case spv::OpBitFieldInsert: - case spv::OpBitFieldSExtract: - case spv::OpBitFieldUExtract: - case spv::OpBitReverse: - case spv::OpBitCount: - case spv::OpSNegate: - case spv::OpFNegate: - case spv::OpLogicalNot: - case spv::OpQuantizeToF16: - // Binary ops - case spv::OpIAdd: - case spv::OpISub: - case spv::OpIMul: - case spv::OpSDiv: - case spv::OpUDiv: - case spv::OpFAdd: - case spv::OpFSub: - case spv::OpFMul: - case spv::OpFDiv: - case spv::OpFMod: - case spv::OpFRem: - case spv::OpFOrdEqual: - case spv::OpFUnordEqual: - case spv::OpFOrdNotEqual: - case spv::OpFUnordNotEqual: - case spv::OpFOrdLessThan: - case spv::OpFUnordLessThan: - case spv::OpFOrdGreaterThan: - case spv::OpFUnordGreaterThan: - case spv::OpFOrdLessThanEqual: - case spv::OpFUnordLessThanEqual: - case spv::OpFOrdGreaterThanEqual: - case spv::OpFUnordGreaterThanEqual: - case spv::OpSMod: - case spv::OpSRem: - case spv::OpUMod: - case spv::OpIEqual: - case spv::OpINotEqual: - case spv::OpUGreaterThan: - case spv::OpSGreaterThan: - case spv::OpUGreaterThanEqual: - case spv::OpSGreaterThanEqual: - case spv::OpULessThan: - case spv::OpSLessThan: - case spv::OpULessThanEqual: - case spv::OpSLessThanEqual: - case spv::OpShiftRightLogical: - case spv::OpShiftRightArithmetic: - case spv::OpShiftLeftLogical: - case spv::OpBitwiseOr: - case spv::OpBitwiseXor: - case spv::OpBitwiseAnd: - case spv::OpLogicalOr: - case spv::OpLogicalAnd: - case spv::OpLogicalEqual: - case spv::OpLogicalNotEqual: - case spv::OpUMulExtended: - case spv::OpSMulExtended: - case spv::OpIAddCarry: - case spv::OpISubBorrow: - case spv::OpDot: - case spv::OpConvertFToU: - case spv::OpConvertFToS: - case spv::OpConvertSToF: - case spv::OpConvertUToF: - case spv::OpBitcast: - case spv::OpSelect: - case spv::OpExtInst: - case spv::OpIsInf: - case spv::OpIsNan: - case spv::OpAny: - case spv::OpAll: - case spv::OpDPdx: - case spv::OpDPdxCoarse: - case spv::OpDPdy: - case spv::OpDPdyCoarse: - case spv::OpFwidth: - case spv::OpFwidthCoarse: - case spv::OpDPdxFine: - case spv::OpDPdyFine: - case spv::OpFwidthFine: - case spv::OpAtomicLoad: - case spv::OpAtomicIAdd: - case spv::OpAtomicISub: - case spv::OpAtomicSMin: - case spv::OpAtomicSMax: - case spv::OpAtomicUMin: - case spv::OpAtomicUMax: - case spv::OpAtomicAnd: - case spv::OpAtomicOr: - case spv::OpAtomicXor: - case spv::OpAtomicIIncrement: - case spv::OpAtomicIDecrement: - case spv::OpAtomicExchange: - case spv::OpAtomicCompareExchange: - case spv::OpPhi: - case spv::OpImageSampleImplicitLod: - case spv::OpImageSampleExplicitLod: - case spv::OpImageSampleDrefImplicitLod: - case spv::OpImageSampleDrefExplicitLod: - case spv::OpImageSampleProjImplicitLod: - case spv::OpImageSampleProjExplicitLod: - case spv::OpImageSampleProjDrefImplicitLod: - case spv::OpImageSampleProjDrefExplicitLod: - case spv::OpImageGather: - case spv::OpImageDrefGather: - case spv::OpImageFetch: - case spv::OpImageQuerySizeLod: - case spv::OpImageQuerySize: - case spv::OpImageQueryLod: - case spv::OpImageQueryLevels: - case spv::OpImageQuerySamples: - case spv::OpImageRead: - case spv::OpImageTexelPointer: - case spv::OpGroupNonUniformElect: - case spv::OpGroupNonUniformAll: - case spv::OpGroupNonUniformAny: - case spv::OpGroupNonUniformAllEqual: - case spv::OpGroupNonUniformBroadcast: - case spv::OpGroupNonUniformBroadcastFirst: - case spv::OpGroupNonUniformBallot: - case spv::OpGroupNonUniformInverseBallot: - case spv::OpGroupNonUniformBallotBitExtract: - case spv::OpGroupNonUniformBallotBitCount: - case spv::OpGroupNonUniformBallotFindLSB: - case spv::OpGroupNonUniformBallotFindMSB: - case spv::OpGroupNonUniformShuffle: - case spv::OpGroupNonUniformShuffleXor: - case spv::OpGroupNonUniformShuffleUp: - case spv::OpGroupNonUniformShuffleDown: - case spv::OpGroupNonUniformIAdd: - case spv::OpGroupNonUniformFAdd: - case spv::OpGroupNonUniformIMul: - case spv::OpGroupNonUniformFMul: - case spv::OpGroupNonUniformSMin: - case spv::OpGroupNonUniformUMin: - case spv::OpGroupNonUniformFMin: - case spv::OpGroupNonUniformSMax: - case spv::OpGroupNonUniformUMax: - case spv::OpGroupNonUniformFMax: - case spv::OpGroupNonUniformBitwiseAnd: - case spv::OpGroupNonUniformBitwiseOr: - case spv::OpGroupNonUniformBitwiseXor: - case spv::OpGroupNonUniformLogicalAnd: - case spv::OpGroupNonUniformLogicalOr: - case spv::OpGroupNonUniformLogicalXor: - case spv::OpCopyObject: - case spv::OpArrayLength: - // Instructions that yield an intermediate value or divergent pointer - DefineResult(insn); - break; + case spv::OpCompositeConstruct: + case spv::OpCompositeInsert: + case spv::OpCompositeExtract: + case spv::OpVectorShuffle: + case spv::OpVectorTimesScalar: + case spv::OpMatrixTimesScalar: + case spv::OpMatrixTimesVector: + case spv::OpVectorTimesMatrix: + case spv::OpMatrixTimesMatrix: + case spv::OpOuterProduct: + case spv::OpTranspose: + case spv::OpVectorExtractDynamic: + case spv::OpVectorInsertDynamic: + // Unary ops + case spv::OpNot: + case spv::OpBitFieldInsert: + case spv::OpBitFieldSExtract: + case spv::OpBitFieldUExtract: + case spv::OpBitReverse: + case spv::OpBitCount: + case spv::OpSNegate: + case spv::OpFNegate: + case spv::OpLogicalNot: + case spv::OpQuantizeToF16: + // Binary ops + case spv::OpIAdd: + case spv::OpISub: + case spv::OpIMul: + case spv::OpSDiv: + case spv::OpUDiv: + case spv::OpFAdd: + case spv::OpFSub: + case spv::OpFMul: + case spv::OpFDiv: + case spv::OpFMod: + case spv::OpFRem: + case spv::OpFOrdEqual: + case spv::OpFUnordEqual: + case spv::OpFOrdNotEqual: + case spv::OpFUnordNotEqual: + case spv::OpFOrdLessThan: + case spv::OpFUnordLessThan: + case spv::OpFOrdGreaterThan: + case spv::OpFUnordGreaterThan: + case spv::OpFOrdLessThanEqual: + case spv::OpFUnordLessThanEqual: + case spv::OpFOrdGreaterThanEqual: + case spv::OpFUnordGreaterThanEqual: + case spv::OpSMod: + case spv::OpSRem: + case spv::OpUMod: + case spv::OpIEqual: + case spv::OpINotEqual: + case spv::OpUGreaterThan: + case spv::OpSGreaterThan: + case spv::OpUGreaterThanEqual: + case spv::OpSGreaterThanEqual: + case spv::OpULessThan: + case spv::OpSLessThan: + case spv::OpULessThanEqual: + case spv::OpSLessThanEqual: + case spv::OpShiftRightLogical: + case spv::OpShiftRightArithmetic: + case spv::OpShiftLeftLogical: + case spv::OpBitwiseOr: + case spv::OpBitwiseXor: + case spv::OpBitwiseAnd: + case spv::OpLogicalOr: + case spv::OpLogicalAnd: + case spv::OpLogicalEqual: + case spv::OpLogicalNotEqual: + case spv::OpUMulExtended: + case spv::OpSMulExtended: + case spv::OpIAddCarry: + case spv::OpISubBorrow: + case spv::OpDot: + case spv::OpConvertFToU: + case spv::OpConvertFToS: + case spv::OpConvertSToF: + case spv::OpConvertUToF: + case spv::OpBitcast: + case spv::OpSelect: + case spv::OpExtInst: + case spv::OpIsInf: + case spv::OpIsNan: + case spv::OpAny: + case spv::OpAll: + case spv::OpDPdx: + case spv::OpDPdxCoarse: + case spv::OpDPdy: + case spv::OpDPdyCoarse: + case spv::OpFwidth: + case spv::OpFwidthCoarse: + case spv::OpDPdxFine: + case spv::OpDPdyFine: + case spv::OpFwidthFine: + case spv::OpAtomicLoad: + case spv::OpAtomicIAdd: + case spv::OpAtomicISub: + case spv::OpAtomicSMin: + case spv::OpAtomicSMax: + case spv::OpAtomicUMin: + case spv::OpAtomicUMax: + case spv::OpAtomicAnd: + case spv::OpAtomicOr: + case spv::OpAtomicXor: + case spv::OpAtomicIIncrement: + case spv::OpAtomicIDecrement: + case spv::OpAtomicExchange: + case spv::OpAtomicCompareExchange: + case spv::OpPhi: + case spv::OpImageSampleImplicitLod: + case spv::OpImageSampleExplicitLod: + case spv::OpImageSampleDrefImplicitLod: + case spv::OpImageSampleDrefExplicitLod: + case spv::OpImageSampleProjImplicitLod: + case spv::OpImageSampleProjExplicitLod: + case spv::OpImageSampleProjDrefImplicitLod: + case spv::OpImageSampleProjDrefExplicitLod: + case spv::OpImageGather: + case spv::OpImageDrefGather: + case spv::OpImageFetch: + case spv::OpImageQuerySizeLod: + case spv::OpImageQuerySize: + case spv::OpImageQueryLod: + case spv::OpImageQueryLevels: + case spv::OpImageQuerySamples: + case spv::OpImageRead: + case spv::OpImageTexelPointer: + case spv::OpGroupNonUniformElect: + case spv::OpGroupNonUniformAll: + case spv::OpGroupNonUniformAny: + case spv::OpGroupNonUniformAllEqual: + case spv::OpGroupNonUniformBroadcast: + case spv::OpGroupNonUniformBroadcastFirst: + case spv::OpGroupNonUniformBallot: + case spv::OpGroupNonUniformInverseBallot: + case spv::OpGroupNonUniformBallotBitExtract: + case spv::OpGroupNonUniformBallotBitCount: + case spv::OpGroupNonUniformBallotFindLSB: + case spv::OpGroupNonUniformBallotFindMSB: + case spv::OpGroupNonUniformShuffle: + case spv::OpGroupNonUniformShuffleXor: + case spv::OpGroupNonUniformShuffleUp: + case spv::OpGroupNonUniformShuffleDown: + case spv::OpGroupNonUniformIAdd: + case spv::OpGroupNonUniformFAdd: + case spv::OpGroupNonUniformIMul: + case spv::OpGroupNonUniformFMul: + case spv::OpGroupNonUniformSMin: + case spv::OpGroupNonUniformUMin: + case spv::OpGroupNonUniformFMin: + case spv::OpGroupNonUniformSMax: + case spv::OpGroupNonUniformUMax: + case spv::OpGroupNonUniformFMax: + case spv::OpGroupNonUniformBitwiseAnd: + case spv::OpGroupNonUniformBitwiseOr: + case spv::OpGroupNonUniformBitwiseXor: + case spv::OpGroupNonUniformLogicalAnd: + case spv::OpGroupNonUniformLogicalOr: + case spv::OpGroupNonUniformLogicalXor: + case spv::OpCopyObject: + case spv::OpArrayLength: + // Instructions that yield an intermediate value or divergent pointer + DefineResult(insn); + break; - case spv::OpStore: - case spv::OpAtomicStore: - case spv::OpImageWrite: - case spv::OpCopyMemory: - case spv::OpMemoryBarrier: - // Don't need to do anything during analysis pass - break; + case spv::OpStore: + case spv::OpAtomicStore: + case spv::OpImageWrite: + case spv::OpCopyMemory: + case spv::OpMemoryBarrier: + // Don't need to do anything during analysis pass + break; - case spv::OpControlBarrier: - modes.ContainsControlBarriers = true; - break; + case spv::OpControlBarrier: + modes.ContainsControlBarriers = true; + break; - case spv::OpExtension: - { - auto ext = insn.string(1); - // Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3 - // extension per Appendix A, `Vulkan Environment for SPIR-V`. - if(!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break; - if(!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break; - if(!strcmp(ext, "SPV_KHR_16bit_storage")) break; - if(!strcmp(ext, "SPV_KHR_variable_pointers")) break; - if(!strcmp(ext, "SPV_KHR_device_group")) break; - if(!strcmp(ext, "SPV_KHR_multiview")) break; - UNSUPPORTED("SPIR-V Extension: %s", ext); - break; - } + case spv::OpExtension: + { + auto ext = insn.string(1); + // Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3 + // extension per Appendix A, `Vulkan Environment for SPIR-V`. + if(!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break; + if(!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break; + if(!strcmp(ext, "SPV_KHR_16bit_storage")) break; + if(!strcmp(ext, "SPV_KHR_variable_pointers")) break; + if(!strcmp(ext, "SPV_KHR_device_group")) break; + if(!strcmp(ext, "SPV_KHR_multiview")) break; + UNSUPPORTED("SPIR-V Extension: %s", ext); + break; + } - default: - UNIMPLEMENTED("%s", OpcodeName(opcode).c_str()); + default: + UNIMPLEMENTED("%s", OpcodeName(opcode).c_str()); } } @@ -702,45 +705,45 @@ // member. All members of such a structure are builtins. switch(insn.opcode()) { - case spv::OpTypeStruct: - { - auto d = memberDecorations.find(resultId); - if(d != memberDecorations.end()) + case spv::OpTypeStruct: { - for(auto &m : d->second) + auto d = memberDecorations.find(resultId); + if(d != memberDecorations.end()) { - if(m.HasBuiltIn) + for(auto &m : d->second) { - type.isBuiltInBlock = true; - break; + if(m.HasBuiltIn) + { + type.isBuiltInBlock = true; + break; + } } } + break; } - break; - } - case spv::OpTypePointer: - { - Type::ID elementTypeId = insn.word(3); - type.element = elementTypeId; - type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock; - type.storageClass = static_cast<spv::StorageClass>(insn.word(2)); - break; - } - case spv::OpTypeVector: - case spv::OpTypeMatrix: - case spv::OpTypeArray: - case spv::OpTypeRuntimeArray: - { - Type::ID elementTypeId = insn.word(2); - type.element = elementTypeId; - break; - } - default: - break; + case spv::OpTypePointer: + { + Type::ID elementTypeId = insn.word(3); + type.element = elementTypeId; + type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock; + type.storageClass = static_cast<spv::StorageClass>(insn.word(2)); + break; + } + case spv::OpTypeVector: + case spv::OpTypeMatrix: + case spv::OpTypeArray: + case spv::OpTypeRuntimeArray: + { + Type::ID elementTypeId = insn.word(2); + type.element = elementTypeId; + break; + } + default: + break; } } -SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn) +SpirvShader::Object &SpirvShader::CreateConstant(InsnIterator insn) { Type::ID typeId = insn.word(1); Object::ID resultId = insn.word(2); @@ -771,7 +774,7 @@ { // walk the builtin block, registering each of its members separately. auto m = memberDecorations.find(objectTy.element); - ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain + ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain auto &structType = pointeeTy.definition; auto offset = 0u; auto word = 2u; @@ -781,7 +784,7 @@ if(member.HasBuiltIn) { - builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents}; + builtinInterface[member.BuiltIn] = { resultId, offset, memberType.sizeInComponents }; } offset += memberType.sizeInComponents; @@ -793,24 +796,24 @@ auto d = decorations.find(resultId); if(d != decorations.end() && d->second.HasBuiltIn) { - builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents}; + builtinInterface[d->second.BuiltIn] = { resultId, 0, pointeeTy.sizeInComponents }; } else { object.kind = Object::Kind::InterfaceVariable; VisitInterface(resultId, - [&userDefinedInterface](Decorations const &d, AttribType type) { - // Populate a single scalar slot in the interface from a collection of decorations and the intended component type. - auto scalarSlot = (d.Location << 2) | d.Component; - ASSERT(scalarSlot >= 0 && - scalarSlot < static_cast<int32_t>(userDefinedInterface.size())); + [&userDefinedInterface](Decorations const &d, AttribType type) { + // Populate a single scalar slot in the interface from a collection of decorations and the intended component type. + auto scalarSlot = (d.Location << 2) | d.Component; + ASSERT(scalarSlot >= 0 && + scalarSlot < static_cast<int32_t>(userDefinedInterface.size())); - auto &slot = userDefinedInterface[scalarSlot]; - slot.Type = type; - slot.Flat = d.Flat; - slot.NoPerspective = d.NoPerspective; - slot.Centroid = d.Centroid; - }); + auto &slot = userDefinedInterface[scalarSlot]; + slot.Type = type; + slot.Flat = d.Flat; + slot.NoPerspective = d.NoPerspective; + slot.Centroid = d.Centroid; + }); } } @@ -819,31 +822,31 @@ auto mode = static_cast<spv::ExecutionMode>(insn.word(2)); switch(mode) { - case spv::ExecutionModeEarlyFragmentTests: - modes.EarlyFragmentTests = true; - break; - case spv::ExecutionModeDepthReplacing: - modes.DepthReplacing = true; - break; - case spv::ExecutionModeDepthGreater: - modes.DepthGreater = true; - break; - case spv::ExecutionModeDepthLess: - modes.DepthLess = true; - break; - case spv::ExecutionModeDepthUnchanged: - modes.DepthUnchanged = true; - break; - case spv::ExecutionModeLocalSize: - modes.WorkgroupSizeX = insn.word(3); - modes.WorkgroupSizeY = insn.word(4); - modes.WorkgroupSizeZ = insn.word(5); - break; - case spv::ExecutionModeOriginUpperLeft: - // This is always the case for a Vulkan shader. Do nothing. - break; - default: - UNREACHABLE("Execution mode: %d", int(mode)); + case spv::ExecutionModeEarlyFragmentTests: + modes.EarlyFragmentTests = true; + break; + case spv::ExecutionModeDepthReplacing: + modes.DepthReplacing = true; + break; + case spv::ExecutionModeDepthGreater: + modes.DepthGreater = true; + break; + case spv::ExecutionModeDepthLess: + modes.DepthLess = true; + break; + case spv::ExecutionModeDepthUnchanged: + modes.DepthUnchanged = true; + break; + case spv::ExecutionModeLocalSize: + modes.WorkgroupSizeX = insn.word(3); + modes.WorkgroupSizeY = insn.word(4); + modes.WorkgroupSizeZ = insn.word(5); + break; + case spv::ExecutionModeOriginUpperLeft: + // This is always the case for a Vulkan shader. Do nothing. + break; + default: + UNREACHABLE("Execution mode: %d", int(mode)); } } @@ -854,54 +857,54 @@ // already been described (and so their sizes determined) switch(insn.opcode()) { - case spv::OpTypeVoid: - case spv::OpTypeSampler: - case spv::OpTypeImage: - case spv::OpTypeSampledImage: - case spv::OpTypeFunction: - case spv::OpTypeRuntimeArray: - // Objects that don't consume any space. - // Descriptor-backed objects currently only need exist at compile-time. - // Runtime arrays don't appear in places where their size would be interesting - return 0; + case spv::OpTypeVoid: + case spv::OpTypeSampler: + case spv::OpTypeImage: + case spv::OpTypeSampledImage: + case spv::OpTypeFunction: + case spv::OpTypeRuntimeArray: + // Objects that don't consume any space. + // Descriptor-backed objects currently only need exist at compile-time. + // Runtime arrays don't appear in places where their size would be interesting + return 0; - case spv::OpTypeBool: - case spv::OpTypeFloat: - case spv::OpTypeInt: - // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components, - // we might need to change this, but only 32 bit components are required for Vulkan 1.1. - return 1; + case spv::OpTypeBool: + case spv::OpTypeFloat: + case spv::OpTypeInt: + // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components, + // we might need to change this, but only 32 bit components are required for Vulkan 1.1. + return 1; - case spv::OpTypeVector: - case spv::OpTypeMatrix: - // Vectors and matrices both consume element count * element size. - return getType(insn.word(2)).sizeInComponents * insn.word(3); + case spv::OpTypeVector: + case spv::OpTypeMatrix: + // Vectors and matrices both consume element count * element size. + return getType(insn.word(2)).sizeInComponents * insn.word(3); - case spv::OpTypeArray: - { - // Element count * element size. Array sizes come from constant ids. - auto arraySize = GetConstScalarInt(insn.word(3)); - return getType(insn.word(2)).sizeInComponents * arraySize; - } - - case spv::OpTypeStruct: - { - uint32_t size = 0; - for(uint32_t i = 2u; i < insn.wordCount(); i++) + case spv::OpTypeArray: { - size += getType(insn.word(i)).sizeInComponents; + // Element count * element size. Array sizes come from constant ids. + auto arraySize = GetConstScalarInt(insn.word(3)); + return getType(insn.word(2)).sizeInComponents * arraySize; } - return size; - } - case spv::OpTypePointer: - // Runtime representation of a pointer is a per-lane index. - // Note: clients are expected to look through the pointer if they want the pointee size instead. - return 1; + case spv::OpTypeStruct: + { + uint32_t size = 0; + for(uint32_t i = 2u; i < insn.wordCount(); i++) + { + size += getType(insn.word(i)).sizeInComponents; + } + return size; + } - default: - UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str()); - return 0; + case spv::OpTypePointer: + // Runtime representation of a pointer is a per-lane index. + // Note: clients are expected to look through the pointer if they want the pointee size instead. + return 1; + + default: + UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str()); + return 0; } } @@ -924,54 +927,54 @@ auto const &obj = getType(id); switch(obj.opcode()) { - case spv::OpTypePointer: - return VisitInterfaceInner(obj.definition.word(3), d, f); - case spv::OpTypeMatrix: - for(auto i = 0u; i < obj.definition.word(3); i++, d.Location++) + case spv::OpTypePointer: + return VisitInterfaceInner(obj.definition.word(3), d, f); + case spv::OpTypeMatrix: + for(auto i = 0u; i < obj.definition.word(3); i++, d.Location++) + { + // consumes same components of N consecutive locations + VisitInterfaceInner(obj.definition.word(2), d, f); + } + return d.Location; + case spv::OpTypeVector: + for(auto i = 0u; i < obj.definition.word(3); i++, d.Component++) + { + // consumes N consecutive components in the same location + VisitInterfaceInner(obj.definition.word(2), d, f); + } + return d.Location + 1; + case spv::OpTypeFloat: + f(d, ATTRIBTYPE_FLOAT); + return d.Location + 1; + case spv::OpTypeInt: + f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT); + return d.Location + 1; + case spv::OpTypeBool: + f(d, ATTRIBTYPE_UINT); + return d.Location + 1; + case spv::OpTypeStruct: { - // consumes same components of N consecutive locations - VisitInterfaceInner(obj.definition.word(2), d, f); + // iterate over members, which may themselves have Location/Component decorations + for(auto i = 0u; i < obj.definition.wordCount() - 2; i++) + { + ApplyDecorationsForIdMember(&d, id, i); + d.Location = VisitInterfaceInner(obj.definition.word(i + 2), d, f); + d.Component = 0; // Implicit locations always have component=0 + } + return d.Location; } - return d.Location; - case spv::OpTypeVector: - for(auto i = 0u; i < obj.definition.word(3); i++, d.Component++) + case spv::OpTypeArray: { - // consumes N consecutive components in the same location - VisitInterfaceInner(obj.definition.word(2), d, f); + auto arraySize = GetConstScalarInt(obj.definition.word(3)); + for(auto i = 0u; i < arraySize; i++) + { + d.Location = VisitInterfaceInner(obj.definition.word(2), d, f); + } + return d.Location; } - return d.Location + 1; - case spv::OpTypeFloat: - f(d, ATTRIBTYPE_FLOAT); - return d.Location + 1; - case spv::OpTypeInt: - f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT); - return d.Location + 1; - case spv::OpTypeBool: - f(d, ATTRIBTYPE_UINT); - return d.Location + 1; - case spv::OpTypeStruct: - { - // iterate over members, which may themselves have Location/Component decorations - for(auto i = 0u; i < obj.definition.wordCount() - 2; i++) - { - ApplyDecorationsForIdMember(&d, id, i); - d.Location = VisitInterfaceInner(obj.definition.word(i + 2), d, f); - d.Component = 0; // Implicit locations always have component=0 - } - return d.Location; - } - case spv::OpTypeArray: - { - auto arraySize = GetConstScalarInt(obj.definition.word(3)); - for(auto i = 0u; i < arraySize; i++) - { - d.Location = VisitInterfaceInner(obj.definition.word(2), d, f); - } - return d.Location; - } - default: - // Intentionally partial; most opcodes do not participate in type hierarchies - return 0; + default: + // Intentionally partial; most opcodes do not participate in type hierarchies + return 0; } } @@ -996,33 +999,33 @@ for(auto i = 0u; i < numIndexes; i++) { ApplyDecorationsForId(d, typeId); - auto & type = getType(typeId); + auto &type = getType(typeId); switch(type.opcode()) { - case spv::OpTypeStruct: - { - int memberIndex = GetConstScalarInt(indexIds[i]); - ApplyDecorationsForIdMember(d, typeId, memberIndex); - typeId = type.definition.word(2u + memberIndex); - break; - } - case spv::OpTypeArray: - case spv::OpTypeRuntimeArray: - if(dd->InputAttachmentIndex >= 0) + case spv::OpTypeStruct: { - dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]); + int memberIndex = GetConstScalarInt(indexIds[i]); + ApplyDecorationsForIdMember(d, typeId, memberIndex); + typeId = type.definition.word(2u + memberIndex); + break; } - typeId = type.element; - break; - case spv::OpTypeVector: - typeId = type.element; - break; - case spv::OpTypeMatrix: - typeId = type.element; - d->InsideMatrix = true; - break; - default: - UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str()); + case spv::OpTypeArray: + case spv::OpTypeRuntimeArray: + if(dd->InputAttachmentIndex >= 0) + { + dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]); + } + typeId = type.element; + break; + case spv::OpTypeVector: + typeId = type.element; + break; + case spv::OpTypeMatrix: + typeId = type.element; + d->InsideMatrix = true; + break; + default: + UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str()); } } } @@ -1057,72 +1060,72 @@ for(auto i = 0u; i < numIndexes; i++) { - auto & type = getType(typeId); + auto &type = getType(typeId); ApplyDecorationsForId(&d, typeId); switch(type.definition.opcode()) { - case spv::OpTypeStruct: - { - int memberIndex = GetConstScalarInt(indexIds[i]); - ApplyDecorationsForIdMember(&d, typeId, memberIndex); - ASSERT(d.HasOffset); - constantOffset += d.Offset; - typeId = type.definition.word(2u + memberIndex); - break; - } - case spv::OpTypeArray: - case spv::OpTypeRuntimeArray: - { - // TODO: b/127950082: Check bounds. - ASSERT(d.HasArrayStride); - auto & obj = getObject(indexIds[i]); - if(obj.kind == Object::Kind::Constant) + case spv::OpTypeStruct: { - constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]); + int memberIndex = GetConstScalarInt(indexIds[i]); + ApplyDecorationsForIdMember(&d, typeId, memberIndex); + ASSERT(d.HasOffset); + constantOffset += d.Offset; + typeId = type.definition.word(2u + memberIndex); + break; } - else + case spv::OpTypeArray: + case spv::OpTypeRuntimeArray: { - ptr += SIMD::Int(d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(0); + // TODO: b/127950082: Check bounds. + ASSERT(d.HasArrayStride); + auto &obj = getObject(indexIds[i]); + if(obj.kind == Object::Kind::Constant) + { + constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]); + } + else + { + ptr += SIMD::Int(d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(0); + } + typeId = type.element; + break; } - typeId = type.element; - break; - } - case spv::OpTypeMatrix: - { - // TODO: b/127950082: Check bounds. - ASSERT(d.HasMatrixStride); - d.InsideMatrix = true; - auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride; - auto & obj = getObject(indexIds[i]); - if(obj.kind == Object::Kind::Constant) + case spv::OpTypeMatrix: { - constantOffset += columnStride * GetConstScalarInt(indexIds[i]); + // TODO: b/127950082: Check bounds. + ASSERT(d.HasMatrixStride); + d.InsideMatrix = true; + auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride; + auto &obj = getObject(indexIds[i]); + if(obj.kind == Object::Kind::Constant) + { + constantOffset += columnStride * GetConstScalarInt(indexIds[i]); + } + else + { + ptr += SIMD::Int(columnStride) * state->getIntermediate(indexIds[i]).Int(0); + } + typeId = type.element; + break; } - else + case spv::OpTypeVector: { - ptr += SIMD::Int(columnStride) * state->getIntermediate(indexIds[i]).Int(0); + auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float)); + auto &obj = getObject(indexIds[i]); + if(obj.kind == Object::Kind::Constant) + { + constantOffset += elemStride * GetConstScalarInt(indexIds[i]); + } + else + { + ptr += SIMD::Int(elemStride) * state->getIntermediate(indexIds[i]).Int(0); + } + typeId = type.element; + break; } - typeId = type.element; - break; - } - case spv::OpTypeVector: - { - auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float)); - auto & obj = getObject(indexIds[i]); - if(obj.kind == Object::Kind::Constant) - { - constantOffset += elemStride * GetConstScalarInt(indexIds[i]); - } - else - { - ptr += SIMD::Int(elemStride) * state->getIntermediate(indexIds[i]).Int(0); - } - typeId = type.element; - break; - } - default: - UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str()); + default: + UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str()); } } @@ -1143,63 +1146,64 @@ for(auto i = 0u; i < numIndexes; i++) { - auto & type = getType(typeId); + auto &type = getType(typeId); switch(type.opcode()) { - case spv::OpTypeStruct: - { - int memberIndex = GetConstScalarInt(indexIds[i]); - int offsetIntoStruct = 0; - for(auto j = 0; j < memberIndex; j++) { - auto memberType = type.definition.word(2u + j); - offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float); - } - constantOffset += offsetIntoStruct; - typeId = type.definition.word(2u + memberIndex); - break; - } - - case spv::OpTypeVector: - case spv::OpTypeMatrix: - case spv::OpTypeArray: - case spv::OpTypeRuntimeArray: - { - // TODO: b/127950082: Check bounds. - if(getType(baseObject.type).storageClass == spv::StorageClassUniformConstant) + case spv::OpTypeStruct: { - // indexing into an array of descriptors. - auto &obj = getObject(indexIds[i]); - if(obj.kind != Object::Kind::Constant) + int memberIndex = GetConstScalarInt(indexIds[i]); + int offsetIntoStruct = 0; + for(auto j = 0; j < memberIndex; j++) { - UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability"); + auto memberType = type.definition.word(2u + j); + offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float); } - - auto d = descriptorDecorations.at(baseId); - ASSERT(d.DescriptorSet >= 0); - ASSERT(d.Binding >= 0); - auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet); - auto stride = static_cast<uint32_t>(setLayout->getBindingStride(d.Binding)); - ptr.base += stride * GetConstScalarInt(indexIds[i]); + constantOffset += offsetIntoStruct; + typeId = type.definition.word(2u + memberIndex); + break; } - else + + case spv::OpTypeVector: + case spv::OpTypeMatrix: + case spv::OpTypeArray: + case spv::OpTypeRuntimeArray: { - auto stride = getType(type.element).sizeInComponents * static_cast<uint32_t>(sizeof(float)); - auto & obj = getObject(indexIds[i]); - if(obj.kind == Object::Kind::Constant) + // TODO: b/127950082: Check bounds. + if(getType(baseObject.type).storageClass == spv::StorageClassUniformConstant) { - ptr += stride * GetConstScalarInt(indexIds[i]); + // indexing into an array of descriptors. + auto &obj = getObject(indexIds[i]); + if(obj.kind != Object::Kind::Constant) + { + UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability"); + } + + auto d = descriptorDecorations.at(baseId); + ASSERT(d.DescriptorSet >= 0); + ASSERT(d.Binding >= 0); + auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet); + auto stride = static_cast<uint32_t>(setLayout->getBindingStride(d.Binding)); + ptr.base += stride * GetConstScalarInt(indexIds[i]); } else { - ptr += SIMD::Int(stride) * state->getIntermediate(indexIds[i]).Int(0); + auto stride = getType(type.element).sizeInComponents * static_cast<uint32_t>(sizeof(float)); + auto &obj = getObject(indexIds[i]); + if(obj.kind == Object::Kind::Constant) + { + ptr += stride * GetConstScalarInt(indexIds[i]); + } + else + { + ptr += SIMD::Int(stride) * state->getIntermediate(indexIds[i]).Int(0); + } } + typeId = type.element; + break; } - typeId = type.element; - break; - } - default: - UNREACHABLE("%s", OpcodeName(type.opcode()).c_str()); + default: + UNREACHABLE("%s", OpcodeName(type.opcode()).c_str()); } } @@ -1216,35 +1220,36 @@ for(auto i = 0u; i < numIndexes; i++) { - auto & type = getType(typeId); + auto &type = getType(typeId); switch(type.opcode()) { - case spv::OpTypeStruct: - { - int memberIndex = indexes[i]; - int offsetIntoStruct = 0; - for(auto j = 0; j < memberIndex; j++) { - auto memberType = type.definition.word(2u + j); - offsetIntoStruct += getType(memberType).sizeInComponents; + case spv::OpTypeStruct: + { + int memberIndex = indexes[i]; + int offsetIntoStruct = 0; + for(auto j = 0; j < memberIndex; j++) + { + auto memberType = type.definition.word(2u + j); + offsetIntoStruct += getType(memberType).sizeInComponents; + } + componentOffset += offsetIntoStruct; + typeId = type.definition.word(2u + memberIndex); + break; } - componentOffset += offsetIntoStruct; - typeId = type.definition.word(2u + memberIndex); - break; - } - case spv::OpTypeVector: - case spv::OpTypeMatrix: - case spv::OpTypeArray: - { - auto elementType = type.definition.word(2); - auto stride = getType(elementType).sizeInComponents; - componentOffset += stride * indexes[i]; - typeId = elementType; - break; - } + case spv::OpTypeVector: + case spv::OpTypeMatrix: + case spv::OpTypeArray: + { + auto elementType = type.definition.word(2); + auto stride = getType(elementType).sizeInComponents; + componentOffset += stride * indexes[i]; + typeId = elementType; + break; + } - default: - UNREACHABLE("%s", OpcodeName(type.opcode()).c_str()); + default: + UNREACHABLE("%s", OpcodeName(type.opcode()).c_str()); } } @@ -1255,58 +1260,58 @@ { switch(decoration) { - case spv::DecorationLocation: - HasLocation = true; - Location = static_cast<int32_t>(arg); - break; - case spv::DecorationComponent: - HasComponent = true; - Component = arg; - break; - case spv::DecorationBuiltIn: - HasBuiltIn = true; - BuiltIn = static_cast<spv::BuiltIn>(arg); - break; - case spv::DecorationFlat: - Flat = true; - break; - case spv::DecorationNoPerspective: - NoPerspective = true; - break; - case spv::DecorationCentroid: - Centroid = true; - break; - case spv::DecorationBlock: - Block = true; - break; - case spv::DecorationBufferBlock: - BufferBlock = true; - break; - case spv::DecorationOffset: - HasOffset = true; - Offset = static_cast<int32_t>(arg); - break; - case spv::DecorationArrayStride: - HasArrayStride = true; - ArrayStride = static_cast<int32_t>(arg); - break; - case spv::DecorationMatrixStride: - HasMatrixStride = true; - MatrixStride = static_cast<int32_t>(arg); - break; - case spv::DecorationRelaxedPrecision: - RelaxedPrecision = true; - break; - case spv::DecorationRowMajor: - HasRowMajor = true; - RowMajor = true; - break; - case spv::DecorationColMajor: - HasRowMajor = true; - RowMajor = false; - default: - // Intentionally partial, there are many decorations we just don't care about. - break; + case spv::DecorationLocation: + HasLocation = true; + Location = static_cast<int32_t>(arg); + break; + case spv::DecorationComponent: + HasComponent = true; + Component = arg; + break; + case spv::DecorationBuiltIn: + HasBuiltIn = true; + BuiltIn = static_cast<spv::BuiltIn>(arg); + break; + case spv::DecorationFlat: + Flat = true; + break; + case spv::DecorationNoPerspective: + NoPerspective = true; + break; + case spv::DecorationCentroid: + Centroid = true; + break; + case spv::DecorationBlock: + Block = true; + break; + case spv::DecorationBufferBlock: + BufferBlock = true; + break; + case spv::DecorationOffset: + HasOffset = true; + Offset = static_cast<int32_t>(arg); + break; + case spv::DecorationArrayStride: + HasArrayStride = true; + ArrayStride = static_cast<int32_t>(arg); + break; + case spv::DecorationMatrixStride: + HasMatrixStride = true; + MatrixStride = static_cast<int32_t>(arg); + break; + case spv::DecorationRelaxedPrecision: + RelaxedPrecision = true; + break; + case spv::DecorationRowMajor: + HasRowMajor = true; + RowMajor = true; + break; + case spv::DecorationColMajor: + HasRowMajor = true; + RowMajor = false; + default: + // Intentionally partial, there are many decorations we just don't care about. + break; } } @@ -1407,15 +1412,15 @@ switch(getType(typeId).opcode()) { - case spv::OpTypePointer: - case spv::OpTypeImage: - case spv::OpTypeSampledImage: - case spv::OpTypeSampler: - object.kind = Object::Kind::Pointer; - break; + case spv::OpTypePointer: + case spv::OpTypeImage: + case spv::OpTypeSampledImage: + case spv::OpTypeSampler: + object.kind = Object::Kind::Pointer; + break; - default: - object.kind = Object::Kind::Intermediate; + default: + object.kind = Object::Kind::Intermediate; } object.definition = insn; @@ -1425,27 +1430,27 @@ { switch(storageClass) { - case spv::StorageClassUniform: - case spv::StorageClassStorageBuffer: - // Buffer resource access. robustBufferAccess feature applies. - return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess - : OutOfBoundsBehavior::UndefinedBehavior; - - case spv::StorageClassImage: - return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined" - - case spv::StorageClassInput: - if(executionModel == spv::ExecutionModelVertex) - { - // Vertex attributes follow robustBufferAccess rules. + case spv::StorageClassUniform: + case spv::StorageClassStorageBuffer: + // Buffer resource access. robustBufferAccess feature applies. return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess : OutOfBoundsBehavior::UndefinedBehavior; - } - // Fall through to default case. - default: - // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain. - // TODO(b/131224163): Optimize cases statically known to be within bounds. - return OutOfBoundsBehavior::UndefinedValue; + + case spv::StorageClassImage: + return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined" + + case spv::StorageClassInput: + if(executionModel == spv::ExecutionModelVertex) + { + // Vertex attributes follow robustBufferAccess rules. + return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess + : OutOfBoundsBehavior::UndefinedBehavior; + } + // Fall through to default case. + default: + // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain. + // TODO(b/131224163): Optimize cases statically known to be within bounds. + return OutOfBoundsBehavior::UndefinedValue; } return OutOfBoundsBehavior::Nullify; @@ -1459,48 +1464,48 @@ { switch(insn.opcode()) { - case spv::OpVariable: - { - Type::ID resultPointerTypeId = insn.word(1); - auto resultPointerType = getType(resultPointerTypeId); - auto pointeeType = getType(resultPointerType.element); + case spv::OpVariable: + { + Type::ID resultPointerTypeId = insn.word(1); + auto resultPointerType = getType(resultPointerTypeId); + auto pointeeType = getType(resultPointerType.element); - if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects? + if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects? + { + Object::ID resultId = insn.word(2); + routine->createVariable(resultId, pointeeType.sizeInComponents); + } + break; + } + case spv::OpPhi: + { + auto type = getType(insn.word(1)); + Object::ID resultId = insn.word(2); + routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents)); + break; + } + + case spv::OpImageDrefGather: + case spv::OpImageFetch: + case spv::OpImageGather: + case spv::OpImageQueryLod: + case spv::OpImageSampleDrefExplicitLod: + case spv::OpImageSampleDrefImplicitLod: + case spv::OpImageSampleExplicitLod: + case spv::OpImageSampleImplicitLod: + case spv::OpImageSampleProjDrefExplicitLod: + case spv::OpImageSampleProjDrefImplicitLod: + case spv::OpImageSampleProjExplicitLod: + case spv::OpImageSampleProjImplicitLod: { Object::ID resultId = insn.word(2); - routine->createVariable(resultId, pointeeType.sizeInComponents); + routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{}); + break; } - break; - } - case spv::OpPhi: - { - auto type = getType(insn.word(1)); - Object::ID resultId = insn.word(2); - routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents)); - break; - } - case spv::OpImageDrefGather: - case spv::OpImageFetch: - case spv::OpImageGather: - case spv::OpImageQueryLod: - case spv::OpImageSampleDrefExplicitLod: - case spv::OpImageSampleDrefImplicitLod: - case spv::OpImageSampleExplicitLod: - case spv::OpImageSampleImplicitLod: - case spv::OpImageSampleProjDrefExplicitLod: - case spv::OpImageSampleProjDrefImplicitLod: - case spv::OpImageSampleProjExplicitLod: - case spv::OpImageSampleProjImplicitLod: - { - Object::ID resultId = insn.word(2); - routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{}); - break; - } - - default: - // Nothing else produces interface variables, so can all be safely ignored. - break; + default: + // Nothing else produces interface variables, so can all be safely ignored. + break; } } } @@ -1531,13 +1536,13 @@ auto res = EmitInstruction(insn, state); switch(res) { - case EmitResult::Continue: - continue; - case EmitResult::Terminator: - break; - default: - UNREACHABLE("Unexpected EmitResult %d", int(res)); - break; + case EmitResult::Continue: + continue; + case EmitResult::Terminator: + break; + default: + UNREACHABLE("Unexpected EmitResult %d", int(res)); + break; } } } @@ -1548,365 +1553,365 @@ switch(opcode) { - case spv::OpTypeVoid: - case spv::OpTypeInt: - case spv::OpTypeFloat: - case spv::OpTypeBool: - case spv::OpTypeVector: - case spv::OpTypeArray: - case spv::OpTypeRuntimeArray: - case spv::OpTypeMatrix: - case spv::OpTypeStruct: - case spv::OpTypePointer: - case spv::OpTypeFunction: - case spv::OpTypeImage: - case spv::OpTypeSampledImage: - case spv::OpTypeSampler: - case spv::OpExecutionMode: - case spv::OpMemoryModel: - case spv::OpFunction: - case spv::OpFunctionEnd: - case spv::OpConstant: - case spv::OpConstantNull: - case spv::OpConstantTrue: - case spv::OpConstantFalse: - case spv::OpConstantComposite: - case spv::OpSpecConstant: - case spv::OpSpecConstantTrue: - case spv::OpSpecConstantFalse: - case spv::OpSpecConstantComposite: - case spv::OpSpecConstantOp: - case spv::OpUndef: - case spv::OpExtension: - case spv::OpCapability: - case spv::OpEntryPoint: - case spv::OpExtInstImport: - case spv::OpDecorate: - case spv::OpMemberDecorate: - case spv::OpGroupDecorate: - case spv::OpGroupMemberDecorate: - case spv::OpDecorationGroup: - case spv::OpName: - case spv::OpMemberName: - case spv::OpSource: - case spv::OpSourceContinued: - case spv::OpSourceExtension: - case spv::OpLine: - case spv::OpNoLine: - case spv::OpModuleProcessed: - case spv::OpString: - // Nothing to do at emit time. These are either fully handled at analysis time, - // or don't require any work at all. - return EmitResult::Continue; + case spv::OpTypeVoid: + case spv::OpTypeInt: + case spv::OpTypeFloat: + case spv::OpTypeBool: + case spv::OpTypeVector: + case spv::OpTypeArray: + case spv::OpTypeRuntimeArray: + case spv::OpTypeMatrix: + case spv::OpTypeStruct: + case spv::OpTypePointer: + case spv::OpTypeFunction: + case spv::OpTypeImage: + case spv::OpTypeSampledImage: + case spv::OpTypeSampler: + case spv::OpExecutionMode: + case spv::OpMemoryModel: + case spv::OpFunction: + case spv::OpFunctionEnd: + case spv::OpConstant: + case spv::OpConstantNull: + case spv::OpConstantTrue: + case spv::OpConstantFalse: + case spv::OpConstantComposite: + case spv::OpSpecConstant: + case spv::OpSpecConstantTrue: + case spv::OpSpecConstantFalse: + case spv::OpSpecConstantComposite: + case spv::OpSpecConstantOp: + case spv::OpUndef: + case spv::OpExtension: + case spv::OpCapability: + case spv::OpEntryPoint: + case spv::OpExtInstImport: + case spv::OpDecorate: + case spv::OpMemberDecorate: + case spv::OpGroupDecorate: + case spv::OpGroupMemberDecorate: + case spv::OpDecorationGroup: + case spv::OpName: + case spv::OpMemberName: + case spv::OpSource: + case spv::OpSourceContinued: + case spv::OpSourceExtension: + case spv::OpLine: + case spv::OpNoLine: + case spv::OpModuleProcessed: + case spv::OpString: + // Nothing to do at emit time. These are either fully handled at analysis time, + // or don't require any work at all. + return EmitResult::Continue; - case spv::OpLabel: - return EmitResult::Continue; + case spv::OpLabel: + return EmitResult::Continue; - case spv::OpVariable: - return EmitVariable(insn, state); + case spv::OpVariable: + return EmitVariable(insn, state); - case spv::OpLoad: - case spv::OpAtomicLoad: - return EmitLoad(insn, state); + case spv::OpLoad: + case spv::OpAtomicLoad: + return EmitLoad(insn, state); - case spv::OpStore: - case spv::OpAtomicStore: - return EmitStore(insn, state); + case spv::OpStore: + case spv::OpAtomicStore: + return EmitStore(insn, state); - case spv::OpAtomicIAdd: - case spv::OpAtomicISub: - case spv::OpAtomicSMin: - case spv::OpAtomicSMax: - case spv::OpAtomicUMin: - case spv::OpAtomicUMax: - case spv::OpAtomicAnd: - case spv::OpAtomicOr: - case spv::OpAtomicXor: - case spv::OpAtomicIIncrement: - case spv::OpAtomicIDecrement: - case spv::OpAtomicExchange: - return EmitAtomicOp(insn, state); + case spv::OpAtomicIAdd: + case spv::OpAtomicISub: + case spv::OpAtomicSMin: + case spv::OpAtomicSMax: + case spv::OpAtomicUMin: + case spv::OpAtomicUMax: + case spv::OpAtomicAnd: + case spv::OpAtomicOr: + case spv::OpAtomicXor: + case spv::OpAtomicIIncrement: + case spv::OpAtomicIDecrement: + case spv::OpAtomicExchange: + return EmitAtomicOp(insn, state); - case spv::OpAtomicCompareExchange: - return EmitAtomicCompareExchange(insn, state); + case spv::OpAtomicCompareExchange: + return EmitAtomicCompareExchange(insn, state); - case spv::OpAccessChain: - case spv::OpInBoundsAccessChain: - return EmitAccessChain(insn, state); + case spv::OpAccessChain: + case spv::OpInBoundsAccessChain: + return EmitAccessChain(insn, state); - case spv::OpCompositeConstruct: - return EmitCompositeConstruct(insn, state); + case spv::OpCompositeConstruct: + return EmitCompositeConstruct(insn, state); - case spv::OpCompositeInsert: - return EmitCompositeInsert(insn, state); + case spv::OpCompositeInsert: + return EmitCompositeInsert(insn, state); - case spv::OpCompositeExtract: - return EmitCompositeExtract(insn, state); + case spv::OpCompositeExtract: + return EmitCompositeExtract(insn, state); - case spv::OpVectorShuffle: - return EmitVectorShuffle(insn, state); + case spv::OpVectorShuffle: + return EmitVectorShuffle(insn, state); - case spv::OpVectorExtractDynamic: - return EmitVectorExtractDynamic(insn, state); + case spv::OpVectorExtractDynamic: + return EmitVectorExtractDynamic(insn, state); - case spv::OpVectorInsertDynamic: - return EmitVectorInsertDynamic(insn, state); + case spv::OpVectorInsertDynamic: + return EmitVectorInsertDynamic(insn, state); - case spv::OpVectorTimesScalar: - case spv::OpMatrixTimesScalar: - return EmitVectorTimesScalar(insn, state); + case spv::OpVectorTimesScalar: + case spv::OpMatrixTimesScalar: + return EmitVectorTimesScalar(insn, state); - case spv::OpMatrixTimesVector: - return EmitMatrixTimesVector(insn, state); + case spv::OpMatrixTimesVector: + return EmitMatrixTimesVector(insn, state); - case spv::OpVectorTimesMatrix: - return EmitVectorTimesMatrix(insn, state); + case spv::OpVectorTimesMatrix: + return EmitVectorTimesMatrix(insn, state); - case spv::OpMatrixTimesMatrix: - return EmitMatrixTimesMatrix(insn, state); + case spv::OpMatrixTimesMatrix: + return EmitMatrixTimesMatrix(insn, state); - case spv::OpOuterProduct: - return EmitOuterProduct(insn, state); + case spv::OpOuterProduct: + return EmitOuterProduct(insn, state); - case spv::OpTranspose: - return EmitTranspose(insn, state); + case spv::OpTranspose: + return EmitTranspose(insn, state); - case spv::OpNot: - case spv::OpBitFieldInsert: - case spv::OpBitFieldSExtract: - case spv::OpBitFieldUExtract: - case spv::OpBitReverse: - case spv::OpBitCount: - case spv::OpSNegate: - case spv::OpFNegate: - case spv::OpLogicalNot: - case spv::OpConvertFToU: - case spv::OpConvertFToS: - case spv::OpConvertSToF: - case spv::OpConvertUToF: - case spv::OpBitcast: - case spv::OpIsInf: - case spv::OpIsNan: - case spv::OpDPdx: - case spv::OpDPdxCoarse: - case spv::OpDPdy: - case spv::OpDPdyCoarse: - case spv::OpFwidth: - case spv::OpFwidthCoarse: - case spv::OpDPdxFine: - case spv::OpDPdyFine: - case spv::OpFwidthFine: - case spv::OpQuantizeToF16: - return EmitUnaryOp(insn, state); + case spv::OpNot: + case spv::OpBitFieldInsert: + case spv::OpBitFieldSExtract: + case spv::OpBitFieldUExtract: + case spv::OpBitReverse: + case spv::OpBitCount: + case spv::OpSNegate: + case spv::OpFNegate: + case spv::OpLogicalNot: + case spv::OpConvertFToU: + case spv::OpConvertFToS: + case spv::OpConvertSToF: + case spv::OpConvertUToF: + case spv::OpBitcast: + case spv::OpIsInf: + case spv::OpIsNan: + case spv::OpDPdx: + case spv::OpDPdxCoarse: + case spv::OpDPdy: + case spv::OpDPdyCoarse: + case spv::OpFwidth: + case spv::OpFwidthCoarse: + case spv::OpDPdxFine: + case spv::OpDPdyFine: + case spv::OpFwidthFine: + case spv::OpQuantizeToF16: + return EmitUnaryOp(insn, state); - case spv::OpIAdd: - case spv::OpISub: - case spv::OpIMul: - case spv::OpSDiv: - case spv::OpUDiv: - case spv::OpFAdd: - case spv::OpFSub: - case spv::OpFMul: - case spv::OpFDiv: - case spv::OpFMod: - case spv::OpFRem: - case spv::OpFOrdEqual: - case spv::OpFUnordEqual: - case spv::OpFOrdNotEqual: - case spv::OpFUnordNotEqual: - case spv::OpFOrdLessThan: - case spv::OpFUnordLessThan: - case spv::OpFOrdGreaterThan: - case spv::OpFUnordGreaterThan: - case spv::OpFOrdLessThanEqual: - case spv::OpFUnordLessThanEqual: - case spv::OpFOrdGreaterThanEqual: - case spv::OpFUnordGreaterThanEqual: - case spv::OpSMod: - case spv::OpSRem: - case spv::OpUMod: - case spv::OpIEqual: - case spv::OpINotEqual: - case spv::OpUGreaterThan: - case spv::OpSGreaterThan: - case spv::OpUGreaterThanEqual: - case spv::OpSGreaterThanEqual: - case spv::OpULessThan: - case spv::OpSLessThan: - case spv::OpULessThanEqual: - case spv::OpSLessThanEqual: - case spv::OpShiftRightLogical: - case spv::OpShiftRightArithmetic: - case spv::OpShiftLeftLogical: - case spv::OpBitwiseOr: - case spv::OpBitwiseXor: - case spv::OpBitwiseAnd: - case spv::OpLogicalOr: - case spv::OpLogicalAnd: - case spv::OpLogicalEqual: - case spv::OpLogicalNotEqual: - case spv::OpUMulExtended: - case spv::OpSMulExtended: - case spv::OpIAddCarry: - case spv::OpISubBorrow: - return EmitBinaryOp(insn, state); + case spv::OpIAdd: + case spv::OpISub: + case spv::OpIMul: + case spv::OpSDiv: + case spv::OpUDiv: + case spv::OpFAdd: + case spv::OpFSub: + case spv::OpFMul: + case spv::OpFDiv: + case spv::OpFMod: + case spv::OpFRem: + case spv::OpFOrdEqual: + case spv::OpFUnordEqual: + case spv::OpFOrdNotEqual: + case spv::OpFUnordNotEqual: + case spv::OpFOrdLessThan: + case spv::OpFUnordLessThan: + case spv::OpFOrdGreaterThan: + case spv::OpFUnordGreaterThan: + case spv::OpFOrdLessThanEqual: + case spv::OpFUnordLessThanEqual: + case spv::OpFOrdGreaterThanEqual: + case spv::OpFUnordGreaterThanEqual: + case spv::OpSMod: + case spv::OpSRem: + case spv::OpUMod: + case spv::OpIEqual: + case spv::OpINotEqual: + case spv::OpUGreaterThan: + case spv::OpSGreaterThan: + case spv::OpUGreaterThanEqual: + case spv::OpSGreaterThanEqual: + case spv::OpULessThan: + case spv::OpSLessThan: + case spv::OpULessThanEqual: + case spv::OpSLessThanEqual: + case spv::OpShiftRightLogical: + case spv::OpShiftRightArithmetic: + case spv::OpShiftLeftLogical: + case spv::OpBitwiseOr: + case spv::OpBitwiseXor: + case spv::OpBitwiseAnd: + case spv::OpLogicalOr: + case spv::OpLogicalAnd: + case spv::OpLogicalEqual: + case spv::OpLogicalNotEqual: + case spv::OpUMulExtended: + case spv::OpSMulExtended: + case spv::OpIAddCarry: + case spv::OpISubBorrow: + return EmitBinaryOp(insn, state); - case spv::OpDot: - return EmitDot(insn, state); + case spv::OpDot: + return EmitDot(insn, state); - case spv::OpSelect: - return EmitSelect(insn, state); + case spv::OpSelect: + return EmitSelect(insn, state); - case spv::OpExtInst: - return EmitExtendedInstruction(insn, state); + case spv::OpExtInst: + return EmitExtendedInstruction(insn, state); - case spv::OpAny: - return EmitAny(insn, state); + case spv::OpAny: + return EmitAny(insn, state); - case spv::OpAll: - return EmitAll(insn, state); + case spv::OpAll: + return EmitAll(insn, state); - case spv::OpBranch: - return EmitBranch(insn, state); + case spv::OpBranch: + return EmitBranch(insn, state); - case spv::OpPhi: - return EmitPhi(insn, state); + case spv::OpPhi: + return EmitPhi(insn, state); - case spv::OpSelectionMerge: - case spv::OpLoopMerge: - return EmitResult::Continue; + case spv::OpSelectionMerge: + case spv::OpLoopMerge: + return EmitResult::Continue; - case spv::OpBranchConditional: - return EmitBranchConditional(insn, state); + case spv::OpBranchConditional: + return EmitBranchConditional(insn, state); - case spv::OpSwitch: - return EmitSwitch(insn, state); + case spv::OpSwitch: + return EmitSwitch(insn, state); - case spv::OpUnreachable: - return EmitUnreachable(insn, state); + case spv::OpUnreachable: + return EmitUnreachable(insn, state); - case spv::OpReturn: - return EmitReturn(insn, state); + case spv::OpReturn: + return EmitReturn(insn, state); - case spv::OpFunctionCall: - return EmitFunctionCall(insn, state); + case spv::OpFunctionCall: + return EmitFunctionCall(insn, state); - case spv::OpKill: - return EmitKill(insn, state); + case spv::OpKill: + return EmitKill(insn, state); - case spv::OpImageSampleImplicitLod: - return EmitImageSampleImplicitLod(None, insn, state); + case spv::OpImageSampleImplicitLod: + return EmitImageSampleImplicitLod(None, insn, state); - case spv::OpImageSampleExplicitLod: - return EmitImageSampleExplicitLod(None, insn, state); + case spv::OpImageSampleExplicitLod: + return EmitImageSampleExplicitLod(None, insn, state); - case spv::OpImageSampleDrefImplicitLod: - return EmitImageSampleImplicitLod(Dref, insn, state); + case spv::OpImageSampleDrefImplicitLod: + return EmitImageSampleImplicitLod(Dref, insn, state); - case spv::OpImageSampleDrefExplicitLod: - return EmitImageSampleExplicitLod(Dref, insn, state); + case spv::OpImageSampleDrefExplicitLod: + return EmitImageSampleExplicitLod(Dref, insn, state); - case spv::OpImageSampleProjImplicitLod: - return EmitImageSampleImplicitLod(Proj, insn, state); + case spv::OpImageSampleProjImplicitLod: + return EmitImageSampleImplicitLod(Proj, insn, state); - case spv::OpImageSampleProjExplicitLod: - return EmitImageSampleExplicitLod(Proj, insn, state); + case spv::OpImageSampleProjExplicitLod: + return EmitImageSampleExplicitLod(Proj, insn, state); - case spv::OpImageSampleProjDrefImplicitLod: - return EmitImageSampleImplicitLod(ProjDref, insn, state); + case spv::OpImageSampleProjDrefImplicitLod: + return EmitImageSampleImplicitLod(ProjDref, insn, state); - case spv::OpImageSampleProjDrefExplicitLod: - return EmitImageSampleExplicitLod(ProjDref, insn, state); + case spv::OpImageSampleProjDrefExplicitLod: + return EmitImageSampleExplicitLod(ProjDref, insn, state); - case spv::OpImageGather: - return EmitImageGather(None, insn, state); + case spv::OpImageGather: + return EmitImageGather(None, insn, state); - case spv::OpImageDrefGather: - return EmitImageGather(Dref, insn, state); + case spv::OpImageDrefGather: + return EmitImageGather(Dref, insn, state); - case spv::OpImageFetch: - return EmitImageFetch(insn, state); + case spv::OpImageFetch: + return EmitImageFetch(insn, state); - case spv::OpImageQuerySizeLod: - return EmitImageQuerySizeLod(insn, state); + case spv::OpImageQuerySizeLod: + return EmitImageQuerySizeLod(insn, state); - case spv::OpImageQuerySize: - return EmitImageQuerySize(insn, state); + case spv::OpImageQuerySize: + return EmitImageQuerySize(insn, state); - case spv::OpImageQueryLod: - return EmitImageQueryLod(insn, state); + case spv::OpImageQueryLod: + return EmitImageQueryLod(insn, state); - case spv::OpImageQueryLevels: - return EmitImageQueryLevels(insn, state); + case spv::OpImageQueryLevels: + return EmitImageQueryLevels(insn, state); - case spv::OpImageQuerySamples: - return EmitImageQuerySamples(insn, state); + case spv::OpImageQuerySamples: + return EmitImageQuerySamples(insn, state); - case spv::OpImageRead: - return EmitImageRead(insn, state); + case spv::OpImageRead: + return EmitImageRead(insn, state); - case spv::OpImageWrite: - return EmitImageWrite(insn, state); + case spv::OpImageWrite: + return EmitImageWrite(insn, state); - case spv::OpImageTexelPointer: - return EmitImageTexelPointer(insn, state); + case spv::OpImageTexelPointer: + return EmitImageTexelPointer(insn, state); - case spv::OpSampledImage: - case spv::OpImage: - return EmitSampledImageCombineOrSplit(insn, state); + case spv::OpSampledImage: + case spv::OpImage: + return EmitSampledImageCombineOrSplit(insn, state); - case spv::OpCopyObject: - return EmitCopyObject(insn, state); + case spv::OpCopyObject: + return EmitCopyObject(insn, state); - case spv::OpCopyMemory: - return EmitCopyMemory(insn, state); + case spv::OpCopyMemory: + return EmitCopyMemory(insn, state); - case spv::OpControlBarrier: - return EmitControlBarrier(insn, state); + case spv::OpControlBarrier: + return EmitControlBarrier(insn, state); - case spv::OpMemoryBarrier: - return EmitMemoryBarrier(insn, state); + case spv::OpMemoryBarrier: + return EmitMemoryBarrier(insn, state); - case spv::OpGroupNonUniformElect: - case spv::OpGroupNonUniformAll: - case spv::OpGroupNonUniformAny: - case spv::OpGroupNonUniformAllEqual: - case spv::OpGroupNonUniformBroadcast: - case spv::OpGroupNonUniformBroadcastFirst: - case spv::OpGroupNonUniformBallot: - case spv::OpGroupNonUniformInverseBallot: - case spv::OpGroupNonUniformBallotBitExtract: - case spv::OpGroupNonUniformBallotBitCount: - case spv::OpGroupNonUniformBallotFindLSB: - case spv::OpGroupNonUniformBallotFindMSB: - case spv::OpGroupNonUniformShuffle: - case spv::OpGroupNonUniformShuffleXor: - case spv::OpGroupNonUniformShuffleUp: - case spv::OpGroupNonUniformShuffleDown: - case spv::OpGroupNonUniformIAdd: - case spv::OpGroupNonUniformFAdd: - case spv::OpGroupNonUniformIMul: - case spv::OpGroupNonUniformFMul: - case spv::OpGroupNonUniformSMin: - case spv::OpGroupNonUniformUMin: - case spv::OpGroupNonUniformFMin: - case spv::OpGroupNonUniformSMax: - case spv::OpGroupNonUniformUMax: - case spv::OpGroupNonUniformFMax: - case spv::OpGroupNonUniformBitwiseAnd: - case spv::OpGroupNonUniformBitwiseOr: - case spv::OpGroupNonUniformBitwiseXor: - case spv::OpGroupNonUniformLogicalAnd: - case spv::OpGroupNonUniformLogicalOr: - case spv::OpGroupNonUniformLogicalXor: - return EmitGroupNonUniform(insn, state); + case spv::OpGroupNonUniformElect: + case spv::OpGroupNonUniformAll: + case spv::OpGroupNonUniformAny: + case spv::OpGroupNonUniformAllEqual: + case spv::OpGroupNonUniformBroadcast: + case spv::OpGroupNonUniformBroadcastFirst: + case spv::OpGroupNonUniformBallot: + case spv::OpGroupNonUniformInverseBallot: + case spv::OpGroupNonUniformBallotBitExtract: + case spv::OpGroupNonUniformBallotBitCount: + case spv::OpGroupNonUniformBallotFindLSB: + case spv::OpGroupNonUniformBallotFindMSB: + case spv::OpGroupNonUniformShuffle: + case spv::OpGroupNonUniformShuffleXor: + case spv::OpGroupNonUniformShuffleUp: + case spv::OpGroupNonUniformShuffleDown: + case spv::OpGroupNonUniformIAdd: + case spv::OpGroupNonUniformFAdd: + case spv::OpGroupNonUniformIMul: + case spv::OpGroupNonUniformFMul: + case spv::OpGroupNonUniformSMin: + case spv::OpGroupNonUniformUMin: + case spv::OpGroupNonUniformFMin: + case spv::OpGroupNonUniformSMax: + case spv::OpGroupNonUniformUMax: + case spv::OpGroupNonUniformFMax: + case spv::OpGroupNonUniformBitwiseAnd: + case spv::OpGroupNonUniformBitwiseOr: + case spv::OpGroupNonUniformBitwiseXor: + case spv::OpGroupNonUniformLogicalAnd: + case spv::OpGroupNonUniformLogicalOr: + case spv::OpGroupNonUniformLogicalXor: + return EmitGroupNonUniform(insn, state); - case spv::OpArrayLength: - return EmitArrayLength(insn, state); + case spv::OpArrayLength: + return EmitArrayLength(insn, state); - default: - UNREACHABLE("%s", OpcodeName(opcode).c_str()); - break; + default: + UNREACHABLE("%s", OpcodeName(opcode).c_str()); + break; } return EmitResult::Continue; @@ -1948,8 +1953,8 @@ for(auto i = 0u; i < insn.wordCount() - 3; i++) { Object::ID srcObjectId = insn.word(3u + i); - auto & srcObject = getObject(srcObjectId); - auto & srcObjectTy = getType(srcObject.type); + auto &srcObject = getObject(srcObjectId); + auto &srcObjectTy = getType(srcObject.type); GenericValue srcObjectAccess(this, state, srcObjectId); for(auto j = 0u; j < srcObjectTy.sizeInComponents; j++) @@ -2092,7 +2097,7 @@ for(auto i = 0u; i < type.sizeInComponents; i++) { auto sel = cond.Int(condIsScalar ? 0 : i); - dst.move(i, (sel & lhs.Int(i)) | (~sel & rhs.Int(i))); // TODO: IfThenElse() + dst.move(i, (sel & lhs.Int(i)) | (~sel & rhs.Int(i))); // TODO: IfThenElse() } return EmitResult::Continue; @@ -2160,41 +2165,41 @@ UInt v; switch(insn.opcode()) { - case spv::OpAtomicIAdd: - case spv::OpAtomicIIncrement: - v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); - break; - case spv::OpAtomicISub: - case spv::OpAtomicIDecrement: - v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); - break; - case spv::OpAtomicAnd: - v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); - break; - case spv::OpAtomicOr: - v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); - break; - case spv::OpAtomicXor: - v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); - break; - case spv::OpAtomicSMin: - v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder)); - break; - case spv::OpAtomicSMax: - v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder)); - break; - case spv::OpAtomicUMin: - v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); - break; - case spv::OpAtomicUMax: - v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); - break; - case spv::OpAtomicExchange: - v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); - break; - default: - UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str()); - break; + case spv::OpAtomicIAdd: + case spv::OpAtomicIIncrement: + v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); + break; + case spv::OpAtomicISub: + case spv::OpAtomicIDecrement: + v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); + break; + case spv::OpAtomicAnd: + v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); + break; + case spv::OpAtomicOr: + v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); + break; + case spv::OpAtomicXor: + v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); + break; + case spv::OpAtomicSMin: + v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder)); + break; + case spv::OpAtomicSMax: + v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder)); + break; + case spv::OpAtomicUMin: + v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); + break; + case spv::OpAtomicUMax: + v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); + break; + case spv::OpAtomicExchange: + v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder); + break; + default: + UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str()); + break; } x = Insert(x, v, j); } @@ -2298,25 +2303,25 @@ { switch(insn.opcode()) { - case spv::OpVariable: - { - Object::ID resultId = insn.word(2); - auto &object = getObject(resultId); - auto &objectTy = getType(object.type); - if(object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput) + case spv::OpVariable: { - auto &dst = routine->getVariable(resultId); - int offset = 0; - VisitInterface(resultId, - [&](Decorations const &d, AttribType type) { - auto scalarSlot = d.Location << 2 | d.Component; - routine->outputs[scalarSlot] = dst[offset++]; - }); + Object::ID resultId = insn.word(2); + auto &object = getObject(resultId); + auto &objectTy = getType(object.type); + if(object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput) + { + auto &dst = routine->getVariable(resultId); + int offset = 0; + VisitInterface(resultId, + [&](Decorations const &d, AttribType type) { + auto scalarSlot = d.Location << 2 | d.Component; + routine->outputs[scalarSlot] = dst[offset++]; + }); + } + break; } - break; - } - default: - break; + default: + break; } } @@ -2331,47 +2336,46 @@ { switch(model) { - case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT; - // case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - // case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - // case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT; - case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT; - case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT; - // case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan. - // case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV; - // case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV; - // case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV; - // case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV; - // case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV; - // case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV; - // case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV; - // case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV; - default: - UNSUPPORTED("ExecutionModel: %d", int(model)); - return VkShaderStageFlagBits(0); + case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT; + // case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + // case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + // case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT; + case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT; + case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT; + // case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan. + // case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV; + // case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV; + // case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV; + // case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV; + // case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV; + // case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV; + // case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV; + // case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV; + default: + UNSUPPORTED("ExecutionModel: %d", int(model)); + return VkShaderStageFlagBits(0); } } -SpirvShader::GenericValue::GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId) : - obj(shader->getObject(objId)), - intermediate(obj.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(objId) : nullptr), - type(obj.type) {} +SpirvShader::GenericValue::GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId) + : obj(shader->getObject(objId)) + , intermediate(obj.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(objId) : nullptr) + , type(obj.type) +{} -SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) : - pipelineLayout(pipelineLayout) +SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) + : pipelineLayout(pipelineLayout) { } void SpirvRoutine::setImmutableInputBuiltins(SpirvShader const *shader) { - setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3)); }); - setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 4); value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 2, 4, 8)); value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); @@ -2379,8 +2383,7 @@ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); }); - setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 4); value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(15, 14, 12, 8)); value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); @@ -2388,8 +2391,7 @@ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); }); - setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 4); value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(14, 12, 8, 0)); value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); @@ -2397,8 +2399,7 @@ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); }); - setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 4); value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 3, 7, 15)); value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); @@ -2406,8 +2407,7 @@ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); }); - setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 4); value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(0, 1, 3, 7)); value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); @@ -2415,8 +2415,7 @@ value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0)); }); - setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 1); // Only a single physical device is supported. value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp index e3d3af7..ecc3f5b 100644 --- a/src/Pipeline/SpirvShader.hpp +++ b/src/Pipeline/SpirvShader.hpp
@@ -15,15 +15,15 @@ #ifndef sw_SpirvShader_hpp #define sw_SpirvShader_hpp -#include "ShaderCore.hpp" #include "SamplerCore.hpp" +#include "ShaderCore.hpp" #include "SpirvID.hpp" -#include "System/Types.hpp" -#include "Vulkan/VkDebug.hpp" -#include "Vulkan/VkConfig.h" -#include "Vulkan/VkDescriptorSet.hpp" #include "Device/Config.hpp" #include "Device/Sampler.hpp" +#include "System/Types.hpp" +#include "Vulkan/VkConfig.h" +#include "Vulkan/VkDebug.hpp" +#include "Vulkan/VkDescriptorSet.hpp" #include <spirv/unified1/spirv.hpp> @@ -31,16 +31,16 @@ #include <atomic> #include <cstdint> #include <cstring> +#include <deque> #include <functional> #include <memory> -#include <deque> #include <string> #include <type_traits> #include <unordered_map> #include <unordered_set> #include <vector> -#undef Yield // b/127920555 +#undef Yield // b/127920555 namespace vk { @@ -65,8 +65,11 @@ class Intermediate { public: - Intermediate(uint32_t size) : scalar(new rr::Value*[size]), size(size) { - memset(scalar, 0, sizeof(rr::Value*) * size); + Intermediate(uint32_t size) + : scalar(new rr::Value *[size]) + , size(size) + { + memset(scalar, 0, sizeof(rr::Value *) * size); } ~Intermediate() @@ -75,12 +78,12 @@ } void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value); } - void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); } - void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); } + void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); } + void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); } void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value); } - void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); } - void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); } + void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); } + void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); } // Value retrieval functions. RValue<SIMD::Float> Float(uint32_t i) const @@ -107,8 +110,8 @@ // No copy/move construction or assignment Intermediate(Intermediate const &) = delete; Intermediate(Intermediate &&) = delete; - Intermediate & operator=(Intermediate const &) = delete; - Intermediate & operator=(Intermediate &&) = delete; + Intermediate &operator=(Intermediate const &) = delete; + Intermediate &operator=(Intermediate &&) = delete; private: void emplace(uint32_t i, rr::Value *value) @@ -128,7 +131,7 @@ using InsnStore = std::vector<uint32_t>; InsnStore insns; - using ImageSampler = void(void* texture, void *sampler, void* uvsIn, void* texelOut, void* constants); + using ImageSampler = void(void *texture, void *sampler, void *uvsIn, void *texelOut, void *constants); enum class YieldResult { @@ -157,15 +160,15 @@ return iter[n]; } - uint32_t const * wordPointer(uint32_t n) const + uint32_t const *wordPointer(uint32_t n) const { ASSERT(n < wordCount()); return &iter[n]; } - const char* string(uint32_t n) const + const char *string(uint32_t n) const { - return reinterpret_cast<const char*>(wordPointer(n)); + return reinterpret_cast<const char *>(wordPointer(n)); } bool operator==(InsnIterator const &other) const @@ -191,7 +194,7 @@ InsnIterator const operator++(int) { - InsnIterator ret{*this}; + InsnIterator ret{ *this }; iter += wordCount(); return ret; } @@ -200,7 +203,8 @@ InsnIterator() = default; - explicit InsnIterator(InsnStore::const_iterator iter) : iter{iter} + explicit InsnIterator(InsnStore::const_iterator iter) + : iter{ iter } { } }; @@ -208,12 +212,12 @@ /* range-based-for interface */ InsnIterator begin() const { - return InsnIterator{insns.cbegin() + 5}; + return InsnIterator{ insns.cbegin() + 5 }; } InsnIterator end() const { - return InsnIterator{insns.cend()}; + return InsnIterator{ insns.cend() }; } class Type @@ -287,11 +291,11 @@ ID from; ID to; - bool operator == (const Edge& other) const { return from == other.from && to == other.to; } + bool operator==(const Edge &other) const { return from == other.from && to == other.to; } struct Hash { - std::size_t operator()(const Edge& edge) const noexcept + std::size_t operator()(const Edge &edge) const noexcept { return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value()); } @@ -299,7 +303,7 @@ }; Block() = default; - Block(const Block& other) = default; + Block(const Block &other) = default; explicit Block(InsnIterator begin, InsnIterator end); /* range-based-for interface */ @@ -308,22 +312,23 @@ enum Kind { - Simple, // OpBranch or other simple terminator. - StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional - UnstructuredBranchConditional, // OpBranchConditional - StructuredSwitch, // OpSelectionMerge + OpSwitch - UnstructuredSwitch, // OpSwitch - Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] + Simple, // OpBranch or other simple terminator. + StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional + UnstructuredBranchConditional, // OpBranchConditional + StructuredSwitch, // OpSelectionMerge + OpSwitch + UnstructuredSwitch, // OpSwitch + Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] }; Kind kind = Simple; - InsnIterator mergeInstruction; // Structured control flow merge instruction. - InsnIterator branchInstruction; // Branch instruction. - ID mergeBlock; // Structured flow merge block. - ID continueTarget; // Loop continue block. - Set ins; // Blocks that branch into this block. - Set outs; // Blocks that this block branches to. + InsnIterator mergeInstruction; // Structured control flow merge instruction. + InsnIterator branchInstruction; // Branch instruction. + ID mergeBlock; // Structured flow merge block. + ID continueTarget; // Loop continue block. + Set ins; // Blocks that branch into this block. + Set outs; // Blocks that this block branches to. bool isLoopMerge = false; + private: InsnIterator begin_; InsnIterator end_; @@ -336,7 +341,7 @@ // Walks all reachable the blocks starting from id adding them to // reachable. - void TraverseReachableBlocks(Block::ID id, Block::Set& reachable) const; + void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const; // AssignBlockFields() performs the following for all reachable blocks: // * Assigns Block::ins with the identifiers of all blocks that contain @@ -362,13 +367,14 @@ return it->second; } - Block::ID entry; // function entry point block. - HandleMap<Block> blocks; // blocks belonging to this function. - Type::ID type; // type of the function. - Type::ID result; // return type. + Block::ID entry; // function entry point block. + HandleMap<Block> blocks; // blocks belonging to this function. + Type::ID type; // type of the function. + Type::ID result; // return type. }; - struct TypeOrObject {}; // Dummy struct to represent a Type or Object. + struct TypeOrObject + {}; // Dummy struct to represent a Type or Object. // TypeOrObjectID is an identifier that represents a Type or an Object, // and supports implicit casting to and from Type::ID or Object::ID. @@ -377,9 +383,15 @@ public: using Hash = std::hash<SpirvID<TypeOrObject>>; - inline TypeOrObjectID(uint32_t id) : SpirvID(id) {} - inline TypeOrObjectID(Type::ID id) : SpirvID(id.value()) {} - inline TypeOrObjectID(Object::ID id) : SpirvID(id.value()) {} + inline TypeOrObjectID(uint32_t id) + : SpirvID(id) + {} + inline TypeOrObjectID(Type::ID id) + : SpirvID(id.value()) + {} + inline TypeOrObjectID(Object::ID id) + : SpirvID(id.value()) + {} inline operator Type::ID() const { return Type::ID(value()); } inline operator Object::ID() const { return Object::ID(value()); } }; @@ -399,14 +411,16 @@ struct ImageInstruction { ImageInstruction(Variant variant, SamplerMethod samplerMethod) - : parameters(0) + : parameters(0) { this->variant = variant; this->samplerMethod = samplerMethod; } // Unmarshal from raw 32-bit data - ImageInstruction(uint32_t parameters) : parameters(parameters) {} + ImageInstruction(uint32_t parameters) + : parameters(parameters) + {} SamplerFunction getSamplerFunction() const { @@ -432,12 +446,12 @@ uint32_t gatherComponent : 2; // Parameters are passed to the sampling routine in this order: - uint32_t coordinates : 3; // 1-4 (does not contain projection component) - // uint32_t dref : 1; // Indicated by Variant::ProjDref|Dref - // uint32_t lodOrBias : 1; // Indicated by SamplerMethod::Lod|Bias|Fetch - uint32_t grad : 2; // 0-3 components (for each of dx / dy) - uint32_t offset : 2; // 0-3 components - uint32_t sample : 1; // 0-1 scalar integer + uint32_t coordinates : 3; // 1-4 (does not contain projection component) + // uint32_t dref : 1; // Indicated by Variant::ProjDref|Dref + // uint32_t lodOrBias : 1; // Indicated by SamplerMethod::Lod|Bias|Fetch + uint32_t grad : 2; // 0-3 components (for each of dx / dy) + uint32_t offset : 2; // 0-3 components + uint32_t sample : 1; // 0-1 scalar integer }; uint32_t parameters; @@ -450,7 +464,7 @@ // shader entry point represented by this object. uint64_t getSerialID() const { - return ((uint64_t)entryPoint.value() << 32) | codeSerialID; + return ((uint64_t)entryPoint.value() << 32) | codeSerialID; } SpirvShader(uint32_t codeSerialID, @@ -577,7 +591,7 @@ bool HasOffset : 1; bool HasArrayStride : 1; bool HasMatrixStride : 1; - bool HasRowMajor : 1; // whether RowMajor bit is valid. + bool HasRowMajor : 1; // whether RowMajor bit is valid. bool Flat : 1; bool Centroid : 1; @@ -585,21 +599,31 @@ bool Block : 1; bool BufferBlock : 1; bool RelaxedPrecision : 1; - bool RowMajor : 1; // RowMajor if true; ColMajor if false - bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. + bool RowMajor : 1; // RowMajor if true; ColMajor if false + bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. Decorations() - : Location{-1}, Component{0}, - BuiltIn{static_cast<spv::BuiltIn>(-1)}, - Offset{-1}, ArrayStride{-1}, MatrixStride{-1}, - HasLocation{false}, HasComponent{false}, - HasBuiltIn{false}, HasOffset{false}, - HasArrayStride{false}, HasMatrixStride{false}, - HasRowMajor{false}, - Flat{false}, Centroid{false}, NoPerspective{false}, - Block{false}, BufferBlock{false}, - RelaxedPrecision{false}, RowMajor{false}, - InsideMatrix{false} + : Location{ -1 } + , Component{ 0 } + , BuiltIn{ static_cast<spv::BuiltIn>(-1) } + , Offset{ -1 } + , ArrayStride{ -1 } + , MatrixStride{ -1 } + , HasLocation{ false } + , HasComponent{ false } + , HasBuiltIn{ false } + , HasOffset{ false } + , HasArrayStride{ false } + , HasMatrixStride{ false } + , HasRowMajor{ false } + , Flat{ false } + , Centroid{ false } + , NoPerspective{ false } + , Block{ false } + , BufferBlock{ false } + , RelaxedPrecision{ false } + , RowMajor{ false } + , InsideMatrix{ false } { } @@ -642,7 +666,8 @@ }; InterfaceComponent() - : Type{ATTRIBTYPE_UNUSED}, DecorationBits{0} + : Type{ ATTRIBTYPE_UNUSED } + , DecorationBits{ 0 } { } }; @@ -673,9 +698,10 @@ } // returns the total allocated size in bytes. inline uint32_t size() const { return totalSize; } + private: - uint32_t totalSize = 0; // in bytes - std::unordered_map<Object::ID, uint32_t> offsets; // in bytes + uint32_t totalSize = 0; // in bytes + std::unordered_map<Object::ID, uint32_t> offsets; // in bytes }; std::vector<InterfaceComponent> inputs; @@ -700,7 +726,7 @@ Function::ID entryPoint; const bool robustBufferAccess = true; - spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. + spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. // DeclareType creates a Type for the given OpTypeX instruction, storing // it into the types map. It is called from the analysis pass (constructor). @@ -768,30 +794,30 @@ using InterfaceVisitor = std::function<void(Decorations const, AttribType)>; - void VisitInterface(Object::ID id, const InterfaceVisitor& v) const; + void VisitInterface(Object::ID id, const InterfaceVisitor &v) const; - int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor& v) const; + int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const; // MemoryElement describes a scalar element within a structure, and is // used by the callback function of VisitMemoryObject(). struct MemoryElement { - uint32_t index; // index of the scalar element - uint32_t offset; // offset (in bytes) from the base of the object - const Type& type; // element type + uint32_t index; // index of the scalar element + uint32_t offset; // offset (in bytes) from the base of the object + const Type &type; // element type }; - using MemoryVisitor = std::function<void(const MemoryElement&)>; + using MemoryVisitor = std::function<void(const MemoryElement &)>; // VisitMemoryObject() walks a type tree in an explicitly laid out // storage class, calling the MemoryVisitor for each scalar element // within the - void VisitMemoryObject(Object::ID id, const MemoryVisitor& v) const; + void VisitMemoryObject(Object::ID id, const MemoryVisitor &v) const; // VisitMemoryObjectInner() is internally called by VisitMemoryObject() - void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor& v) const; + void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &v) const; - Object& CreateConstant(InsnIterator it); + Object &CreateConstant(InsnIterator it); void ProcessInterfaceVariable(Object &object); @@ -800,19 +826,19 @@ { public: EmitState(SpirvRoutine *routine, - Function::ID function, - RValue<SIMD::Int> activeLaneMask, - RValue<SIMD::Int> storesAndAtomicsMask, - const vk::DescriptorSet::Bindings &descriptorSets, - bool robustBufferAccess, - spv::ExecutionModel executionModel) - : routine(routine), - function(function), - activeLaneMaskValue(activeLaneMask.value), - storesAndAtomicsMaskValue(storesAndAtomicsMask.value), - descriptorSets(descriptorSets), - robustBufferAccess(robustBufferAccess), - executionModel(executionModel) + Function::ID function, + RValue<SIMD::Int> activeLaneMask, + RValue<SIMD::Int> storesAndAtomicsMask, + const vk::DescriptorSet::Bindings &descriptorSets, + bool robustBufferAccess, + spv::ExecutionModel executionModel) + : routine(routine) + , function(function) + , activeLaneMaskValue(activeLaneMask.value) + , storesAndAtomicsMaskValue(storesAndAtomicsMask.value) + , descriptorSets(descriptorSets) + , robustBufferAccess(robustBufferAccess) + , executionModel(executionModel) { ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting. } @@ -845,12 +871,12 @@ // they will be ORed together. void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask); - SpirvRoutine *routine = nullptr; // The current routine being built. - Function::ID function; // The current function being built. - Block::ID block; // The current block being built. - rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. - rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. - Block::Set visited; // Blocks already built. + SpirvRoutine *routine = nullptr; // The current routine being built. + Function::ID function; // The current function being built. + Block::ID block; // The current block being built. + rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. + rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. + Block::Set visited; // Blocks already built. std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; std::deque<Block::ID> *pending; @@ -858,16 +884,16 @@ OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const; - Intermediate& createIntermediate(Object::ID id, uint32_t size) + Intermediate &createIntermediate(Object::ID id, uint32_t size) { auto it = intermediates.emplace(std::piecewise_construct, - std::forward_as_tuple(id), - std::forward_as_tuple(size)); + std::forward_as_tuple(id), + std::forward_as_tuple(size)); ASSERT_MSG(it.second, "Intermediate %d created twice", id.value()); return it.first->second; } - Intermediate const& getIntermediate(Object::ID id) const + Intermediate const &getIntermediate(Object::ID id) const { auto it = intermediates.find(id); ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value()); @@ -880,7 +906,7 @@ ASSERT_MSG(added, "Pointer %d created twice", id.value()); } - SIMD::Pointer const& getPointer(Object::ID id) const + SIMD::Pointer const &getPointer(Object::ID id) const { auto it = pointers.find(id); ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value()); @@ -898,8 +924,8 @@ // EmitResult is an enumerator of result values from the Emit functions. enum class EmitResult { - Continue, // No termination instructions. - Terminator, // Reached a termination instruction. + Continue, // No termination instructions. + Terminator, // Reached a termination instruction. }; // Generic wrapper over either per-lane intermediate value, or a constant. @@ -1058,7 +1084,7 @@ EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const; void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const; - SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const; + SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const; uint32_t GetConstScalarInt(Object::ID id) const; void EvalSpecConstantOp(InsnIterator insn); void EvalSpecConstantUnaryOp(InsnIterator insn); @@ -1071,7 +1097,7 @@ // StorePhi updates the phi's alloca storage value using the incoming // values from blocks that are both in the OpPhi instruction and in // filter. - void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const; + void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const; // Emits a rr::Fence for the given MemorySemanticsMask. void Fence(spv::MemorySemanticsMask semantics) const; @@ -1085,7 +1111,7 @@ static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics); // Helper as we often need to take dot products as part of doing other things. - SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const; + SIMD::Float Dot(unsigned numComponents, GenericValue const &x, GenericValue const &y) const; // Splits x into a floating-point significand in the range [0.5, 1.0) // and an integral exponent of two, such that: @@ -1121,21 +1147,21 @@ Pointer<Byte> function; }; - vk::PipelineLayout const * const pipelineLayout; + vk::PipelineLayout const *const pipelineLayout; std::unordered_map<SpirvShader::Object::ID, Variable> variables; std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache; - Variable inputs = Variable{MAX_INTERFACE_COMPONENTS}; - Variable outputs = Variable{MAX_INTERFACE_COMPONENTS}; + Variable inputs = Variable{ MAX_INTERFACE_COMPONENTS }; + Variable outputs = Variable{ MAX_INTERFACE_COMPONENTS }; Pointer<Byte> workgroupMemory; Pointer<Pointer<Byte>> descriptorSets; Pointer<Int> descriptorDynamicOffsets; Pointer<Byte> pushConstants; Pointer<Byte> constants; - Int killMask = Int{0}; + Int killMask = Int{ 0 }; SIMD::Int windowSpacePosition[2]; - Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex + Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex void createVariable(SpirvShader::Object::ID id, uint32_t size) { @@ -1143,7 +1169,7 @@ ASSERT_MSG(added, "Variable %d created twice", id.value()); } - Variable& getVariable(SpirvShader::Object::ID id) + Variable &getVariable(SpirvShader::Object::ID id) { auto it = variables.find(id); ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value()); @@ -1158,13 +1184,13 @@ // uses the input builtin, otherwise the call is a no-op. // F is a function with the signature: // void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - template <typename F> - inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F&& f) + template<typename F> + inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F &&f) { auto it = shader->inputBuiltins.find(id); if(it != shader->inputBuiltins.end()) { - const auto& builtin = it->second; + const auto &builtin = it->second; f(builtin, getVariable(builtin.Id)); } } @@ -1176,7 +1202,6 @@ friend class SpirvShader; std::unordered_map<SpirvShader::Object::ID, Variable> phis; - }; } // namespace sw
diff --git a/src/Pipeline/SpirvShaderArithmetic.cpp b/src/Pipeline/SpirvShaderArithmetic.cpp index 218df0e..f039124 100644 --- a/src/Pipeline/SpirvShaderArithmetic.cpp +++ b/src/Pipeline/SpirvShaderArithmetic.cpp
@@ -163,153 +163,152 @@ { switch(insn.opcode()) { - case spv::OpNot: - case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation - dst.move(i, ~src.UInt(i)); - break; - case spv::OpBitFieldInsert: - { - auto insert = GenericValue(this, state, insn.word(4)).UInt(i); - auto offset = GenericValue(this, state, insn.word(5)).UInt(0); - auto count = GenericValue(this, state, insn.word(6)).UInt(0); - auto one = SIMD::UInt(1); - auto v = src.UInt(i); - auto mask = Bitmask32(offset + count) ^ Bitmask32(offset); - dst.move(i, (v & ~mask) | ((insert << offset) & mask)); - break; - } - case spv::OpBitFieldSExtract: - case spv::OpBitFieldUExtract: - { - auto offset = GenericValue(this, state, insn.word(4)).UInt(0); - auto count = GenericValue(this, state, insn.word(5)).UInt(0); - auto one = SIMD::UInt(1); - auto v = src.UInt(i); - SIMD::UInt out = (v >> offset) & Bitmask32(count); - if(insn.opcode() == spv::OpBitFieldSExtract) + case spv::OpNot: + case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation + dst.move(i, ~src.UInt(i)); + break; + case spv::OpBitFieldInsert: { - auto sign = out & NthBit32(count - one); - auto sext = ~(sign - one); - out |= sext; + auto insert = GenericValue(this, state, insn.word(4)).UInt(i); + auto offset = GenericValue(this, state, insn.word(5)).UInt(0); + auto count = GenericValue(this, state, insn.word(6)).UInt(0); + auto one = SIMD::UInt(1); + auto v = src.UInt(i); + auto mask = Bitmask32(offset + count) ^ Bitmask32(offset); + dst.move(i, (v & ~mask) | ((insert << offset) & mask)); + break; } - dst.move(i, out); - break; - } - case spv::OpBitReverse: - { - // TODO: Add an intrinsic to reactor. Even if there isn't a - // single vector instruction, there may be target-dependent - // ways to make this faster. - // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel - SIMD::UInt v = src.UInt(i); - v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1); - v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2); - v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4); - v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8); - v = (v >> 16) | (v << 16); - dst.move(i, v); - break; - } - case spv::OpBitCount: - dst.move(i, CountBits(src.UInt(i))); - break; - case spv::OpSNegate: - dst.move(i, -src.Int(i)); - break; - case spv::OpFNegate: - dst.move(i, -src.Float(i)); - break; - case spv::OpConvertFToU: - dst.move(i, SIMD::UInt(src.Float(i))); - break; - case spv::OpConvertFToS: - dst.move(i, SIMD::Int(src.Float(i))); - break; - case spv::OpConvertSToF: - dst.move(i, SIMD::Float(src.Int(i))); - break; - case spv::OpConvertUToF: - dst.move(i, SIMD::Float(src.UInt(i))); - break; - case spv::OpBitcast: - dst.move(i, src.Float(i)); - break; - case spv::OpIsInf: - dst.move(i, IsInf(src.Float(i))); - break; - case spv::OpIsNan: - dst.move(i, IsNan(src.Float(i))); - break; - case spv::OpDPdx: - case spv::OpDPdxCoarse: - // Derivative instructions: FS invocations are laid out like so: - // 0 1 - // 2 3 - static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width"); - dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))); - break; - case spv::OpDPdy: - case spv::OpDPdyCoarse: - dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))); - break; - case spv::OpFwidth: - case spv::OpFwidthCoarse: - dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)) - + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)))); - break; - case spv::OpDPdxFine: - { - auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0); - auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2); - SIMD::Float v = SIMD::Float(firstRow); - v = Insert(v, secondRow, 2); - v = Insert(v, secondRow, 3); - dst.move(i, v); - break; - } - case spv::OpDPdyFine: - { - auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0); - auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1); - SIMD::Float v = SIMD::Float(firstColumn); - v = Insert(v, secondColumn, 1); - v = Insert(v, secondColumn, 3); - dst.move(i, v); - break; - } - case spv::OpFwidthFine: - { - auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0); - auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2); - SIMD::Float dpdx = SIMD::Float(firstRow); - dpdx = Insert(dpdx, secondRow, 2); - dpdx = Insert(dpdx, secondRow, 3); - auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0); - auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1); - SIMD::Float dpdy = SIMD::Float(firstColumn); - dpdy = Insert(dpdy, secondColumn, 1); - dpdy = Insert(dpdy, secondColumn, 3); - dst.move(i, Abs(dpdx) + Abs(dpdy)); - break; - } - case spv::OpQuantizeToF16: - { - // Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp - auto abs = Abs(src.Float(i)); - auto sign = src.Int(i) & SIMD::Int(0x80000000); - auto isZero = CmpLT(abs, SIMD::Float(0.000061035f)); - auto isInf = CmpGT(abs, SIMD::Float(65504.0f)); - auto isNaN = IsNan(abs); - auto isInfOrNan = isInf | isNaN; - SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000); - v &= ~isZero | SIMD::Int(0x80000000); - v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v); - v |= isNaN & SIMD::Int(0x400000); - dst.move(i, v); - break; - } - default: - UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str()); + case spv::OpBitFieldSExtract: + case spv::OpBitFieldUExtract: + { + auto offset = GenericValue(this, state, insn.word(4)).UInt(0); + auto count = GenericValue(this, state, insn.word(5)).UInt(0); + auto one = SIMD::UInt(1); + auto v = src.UInt(i); + SIMD::UInt out = (v >> offset) & Bitmask32(count); + if(insn.opcode() == spv::OpBitFieldSExtract) + { + auto sign = out & NthBit32(count - one); + auto sext = ~(sign - one); + out |= sext; + } + dst.move(i, out); + break; + } + case spv::OpBitReverse: + { + // TODO: Add an intrinsic to reactor. Even if there isn't a + // single vector instruction, there may be target-dependent + // ways to make this faster. + // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel + SIMD::UInt v = src.UInt(i); + v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1); + v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2); + v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4); + v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8); + v = (v >> 16) | (v << 16); + dst.move(i, v); + break; + } + case spv::OpBitCount: + dst.move(i, CountBits(src.UInt(i))); + break; + case spv::OpSNegate: + dst.move(i, -src.Int(i)); + break; + case spv::OpFNegate: + dst.move(i, -src.Float(i)); + break; + case spv::OpConvertFToU: + dst.move(i, SIMD::UInt(src.Float(i))); + break; + case spv::OpConvertFToS: + dst.move(i, SIMD::Int(src.Float(i))); + break; + case spv::OpConvertSToF: + dst.move(i, SIMD::Float(src.Int(i))); + break; + case spv::OpConvertUToF: + dst.move(i, SIMD::Float(src.UInt(i))); + break; + case spv::OpBitcast: + dst.move(i, src.Float(i)); + break; + case spv::OpIsInf: + dst.move(i, IsInf(src.Float(i))); + break; + case spv::OpIsNan: + dst.move(i, IsNan(src.Float(i))); + break; + case spv::OpDPdx: + case spv::OpDPdxCoarse: + // Derivative instructions: FS invocations are laid out like so: + // 0 1 + // 2 3 + static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width"); + dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))); + break; + case spv::OpDPdy: + case spv::OpDPdyCoarse: + dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))); + break; + case spv::OpFwidth: + case spv::OpFwidthCoarse: + dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)) + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)))); + break; + case spv::OpDPdxFine: + { + auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0); + auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2); + SIMD::Float v = SIMD::Float(firstRow); + v = Insert(v, secondRow, 2); + v = Insert(v, secondRow, 3); + dst.move(i, v); + break; + } + case spv::OpDPdyFine: + { + auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0); + auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1); + SIMD::Float v = SIMD::Float(firstColumn); + v = Insert(v, secondColumn, 1); + v = Insert(v, secondColumn, 3); + dst.move(i, v); + break; + } + case spv::OpFwidthFine: + { + auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0); + auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2); + SIMD::Float dpdx = SIMD::Float(firstRow); + dpdx = Insert(dpdx, secondRow, 2); + dpdx = Insert(dpdx, secondRow, 3); + auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0); + auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1); + SIMD::Float dpdy = SIMD::Float(firstColumn); + dpdy = Insert(dpdy, secondColumn, 1); + dpdy = Insert(dpdy, secondColumn, 3); + dst.move(i, Abs(dpdx) + Abs(dpdy)); + break; + } + case spv::OpQuantizeToF16: + { + // Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp + auto abs = Abs(src.Float(i)); + auto sign = src.Int(i) & SIMD::Int(0x80000000); + auto isZero = CmpLT(abs, SIMD::Float(0.000061035f)); + auto isInf = CmpGT(abs, SIMD::Float(65504.0f)); + auto isNaN = IsNan(abs); + auto isInfOrNan = isInf | isNaN; + SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000); + v &= ~isZero | SIMD::Int(0x80000000); + v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v); + v |= isNaN & SIMD::Int(0x400000); + dst.move(i, v); + break; + } + default: + UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str()); } } @@ -328,191 +327,191 @@ { switch(insn.opcode()) { - case spv::OpIAdd: - dst.move(i, lhs.Int(i) + rhs.Int(i)); - break; - case spv::OpISub: - dst.move(i, lhs.Int(i) - rhs.Int(i)); - break; - case spv::OpIMul: - dst.move(i, lhs.Int(i) * rhs.Int(i)); - break; - case spv::OpSDiv: - { - SIMD::Int a = lhs.Int(i); - SIMD::Int b = rhs.Int(i); - b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero - a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow - dst.move(i, a / b); - break; - } - case spv::OpUDiv: - { - auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0))); - dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask)); - break; - } - case spv::OpSRem: - { - SIMD::Int a = lhs.Int(i); - SIMD::Int b = rhs.Int(i); - b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero - a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow - dst.move(i, a % b); - break; - } - case spv::OpSMod: - { - SIMD::Int a = lhs.Int(i); - SIMD::Int b = rhs.Int(i); - b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero - a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow - auto mod = a % b; - // If a and b have opposite signs, the remainder operation takes - // the sign from a but OpSMod is supposed to take the sign of b. - // Adding b will ensure that the result has the correct sign and - // that it is still congruent to a modulo b. - // - // See also http://mathforum.org/library/drmath/view/52343.html - auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0))); - auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff); - dst.move(i, As<SIMD::Float>(fixedMod)); - break; - } - case spv::OpUMod: - { - auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0))); - dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask)); - break; - } - case spv::OpIEqual: - case spv::OpLogicalEqual: - dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i))); - break; - case spv::OpINotEqual: - case spv::OpLogicalNotEqual: - dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i))); - break; - case spv::OpUGreaterThan: - dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i))); - break; - case spv::OpSGreaterThan: - dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i))); - break; - case spv::OpUGreaterThanEqual: - dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i))); - break; - case spv::OpSGreaterThanEqual: - dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i))); - break; - case spv::OpULessThan: - dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i))); - break; - case spv::OpSLessThan: - dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i))); - break; - case spv::OpULessThanEqual: - dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i))); - break; - case spv::OpSLessThanEqual: - dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i))); - break; - case spv::OpFAdd: - dst.move(i, lhs.Float(i) + rhs.Float(i)); - break; - case spv::OpFSub: - dst.move(i, lhs.Float(i) - rhs.Float(i)); - break; - case spv::OpFMul: - dst.move(i, lhs.Float(i) * rhs.Float(i)); - break; - case spv::OpFDiv: - dst.move(i, lhs.Float(i) / rhs.Float(i)); - break; - case spv::OpFMod: - // TODO(b/126873455): inaccurate for values greater than 2^24 - dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i))); - break; - case spv::OpFRem: - dst.move(i, lhs.Float(i) % rhs.Float(i)); - break; - case spv::OpFOrdEqual: - dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFUnordEqual: - dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFOrdNotEqual: - dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFUnordNotEqual: - dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFOrdLessThan: - dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFUnordLessThan: - dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFOrdGreaterThan: - dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFUnordGreaterThan: - dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFOrdLessThanEqual: - dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFUnordLessThanEqual: - dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFOrdGreaterThanEqual: - dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpFUnordGreaterThanEqual: - dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i))); - break; - case spv::OpShiftRightLogical: - dst.move(i, lhs.UInt(i) >> rhs.UInt(i)); - break; - case spv::OpShiftRightArithmetic: - dst.move(i, lhs.Int(i) >> rhs.Int(i)); - break; - case spv::OpShiftLeftLogical: - dst.move(i, lhs.UInt(i) << rhs.UInt(i)); - break; - case spv::OpBitwiseOr: - case spv::OpLogicalOr: - dst.move(i, lhs.UInt(i) | rhs.UInt(i)); - break; - case spv::OpBitwiseXor: - dst.move(i, lhs.UInt(i) ^ rhs.UInt(i)); - break; - case spv::OpBitwiseAnd: - case spv::OpLogicalAnd: - dst.move(i, lhs.UInt(i) & rhs.UInt(i)); - break; - case spv::OpSMulExtended: - // Extended ops: result is a structure containing two members of the same type as lhs & rhs. - // In our flat view then, component i is the i'th component of the first member; - // component i + N is the i'th component of the second member. - dst.move(i, lhs.Int(i) * rhs.Int(i)); - dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i))); - break; - case spv::OpUMulExtended: - dst.move(i, lhs.UInt(i) * rhs.UInt(i)); - dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i))); - break; - case spv::OpIAddCarry: - dst.move(i, lhs.UInt(i) + rhs.UInt(i)); - dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31); - break; - case spv::OpISubBorrow: - dst.move(i, lhs.UInt(i) - rhs.UInt(i)); - dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31); - break; - default: - UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str()); + case spv::OpIAdd: + dst.move(i, lhs.Int(i) + rhs.Int(i)); + break; + case spv::OpISub: + dst.move(i, lhs.Int(i) - rhs.Int(i)); + break; + case spv::OpIMul: + dst.move(i, lhs.Int(i) * rhs.Int(i)); + break; + case spv::OpSDiv: + { + SIMD::Int a = lhs.Int(i); + SIMD::Int b = rhs.Int(i); + b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero + a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow + dst.move(i, a / b); + break; + } + case spv::OpUDiv: + { + auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0))); + dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask)); + break; + } + case spv::OpSRem: + { + SIMD::Int a = lhs.Int(i); + SIMD::Int b = rhs.Int(i); + b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero + a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow + dst.move(i, a % b); + break; + } + case spv::OpSMod: + { + SIMD::Int a = lhs.Int(i); + SIMD::Int b = rhs.Int(i); + b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero + a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow + auto mod = a % b; + // If a and b have opposite signs, the remainder operation takes + // the sign from a but OpSMod is supposed to take the sign of b. + // Adding b will ensure that the result has the correct sign and + // that it is still congruent to a modulo b. + // + // See also http://mathforum.org/library/drmath/view/52343.html + auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0))); + auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff); + dst.move(i, As<SIMD::Float>(fixedMod)); + break; + } + case spv::OpUMod: + { + auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0))); + dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask)); + break; + } + case spv::OpIEqual: + case spv::OpLogicalEqual: + dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i))); + break; + case spv::OpINotEqual: + case spv::OpLogicalNotEqual: + dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i))); + break; + case spv::OpUGreaterThan: + dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i))); + break; + case spv::OpSGreaterThan: + dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i))); + break; + case spv::OpUGreaterThanEqual: + dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i))); + break; + case spv::OpSGreaterThanEqual: + dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i))); + break; + case spv::OpULessThan: + dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i))); + break; + case spv::OpSLessThan: + dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i))); + break; + case spv::OpULessThanEqual: + dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i))); + break; + case spv::OpSLessThanEqual: + dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i))); + break; + case spv::OpFAdd: + dst.move(i, lhs.Float(i) + rhs.Float(i)); + break; + case spv::OpFSub: + dst.move(i, lhs.Float(i) - rhs.Float(i)); + break; + case spv::OpFMul: + dst.move(i, lhs.Float(i) * rhs.Float(i)); + break; + case spv::OpFDiv: + dst.move(i, lhs.Float(i) / rhs.Float(i)); + break; + case spv::OpFMod: + // TODO(b/126873455): inaccurate for values greater than 2^24 + dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i))); + break; + case spv::OpFRem: + dst.move(i, lhs.Float(i) % rhs.Float(i)); + break; + case spv::OpFOrdEqual: + dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFUnordEqual: + dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFOrdNotEqual: + dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFUnordNotEqual: + dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFOrdLessThan: + dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFUnordLessThan: + dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFOrdGreaterThan: + dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFUnordGreaterThan: + dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFOrdLessThanEqual: + dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFUnordLessThanEqual: + dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFOrdGreaterThanEqual: + dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpFUnordGreaterThanEqual: + dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i))); + break; + case spv::OpShiftRightLogical: + dst.move(i, lhs.UInt(i) >> rhs.UInt(i)); + break; + case spv::OpShiftRightArithmetic: + dst.move(i, lhs.Int(i) >> rhs.Int(i)); + break; + case spv::OpShiftLeftLogical: + dst.move(i, lhs.UInt(i) << rhs.UInt(i)); + break; + case spv::OpBitwiseOr: + case spv::OpLogicalOr: + dst.move(i, lhs.UInt(i) | rhs.UInt(i)); + break; + case spv::OpBitwiseXor: + dst.move(i, lhs.UInt(i) ^ rhs.UInt(i)); + break; + case spv::OpBitwiseAnd: + case spv::OpLogicalAnd: + dst.move(i, lhs.UInt(i) & rhs.UInt(i)); + break; + case spv::OpSMulExtended: + // Extended ops: result is a structure containing two members of the same type as lhs & rhs. + // In our flat view then, component i is the i'th component of the first member; + // component i + N is the i'th component of the second member. + dst.move(i, lhs.Int(i) * rhs.Int(i)); + dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i))); + break; + case spv::OpUMulExtended: + dst.move(i, lhs.UInt(i) * rhs.UInt(i)); + dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i))); + break; + case spv::OpIAddCarry: + dst.move(i, lhs.UInt(i) + rhs.UInt(i)); + dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31); + break; + case spv::OpISubBorrow: + dst.move(i, lhs.UInt(i) - rhs.UInt(i)); + dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31); + break; + default: + UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str()); } } @@ -532,7 +531,7 @@ return EmitResult::Continue; } -SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const +SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const &x, GenericValue const &y) const { SIMD::Float d = x.Float(0) * y.Float(0);
diff --git a/src/Pipeline/SpirvShaderControlFlow.cpp b/src/Pipeline/SpirvShaderControlFlow.cpp index e7b1fd7..5f7a1b1 100644 --- a/src/Pipeline/SpirvShaderControlFlow.cpp +++ b/src/Pipeline/SpirvShaderControlFlow.cpp
@@ -14,7 +14,7 @@ #include "SpirvShader.hpp" -#include "Reactor/Coroutine.hpp" // rr::Yield +#include "Reactor/Coroutine.hpp" // rr::Yield #include "ShaderCore.hpp" @@ -24,7 +24,9 @@ namespace sw { -SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end) +SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) + : begin_(begin) + , end_(end) { // Default to a Simple, this may change later. kind = Block::Simple; @@ -111,7 +113,7 @@ } } -void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable) const +void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set &reachable) const { if(reachable.count(id) == 0) { @@ -156,8 +158,8 @@ auto block = getBlock(blockId); for(auto dep : block.ins) { - if(block.kind != Block::Loop || // if not a loop... - !ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge + if(block.kind != Block::Loop || // if not a loop... + !ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge { f(dep); } @@ -196,7 +198,7 @@ void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask) { - auto edge = Block::Edge{from, to}; + auto edge = Block::Edge{ from, to }; auto it = edgeActiveLaneMasks.find(edge); if(it == edgeActiveLaneMasks.end()) { @@ -212,7 +214,7 @@ RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const { - auto edge = Block::Edge{from, to}; + auto edge = Block::Edge{ from, to }; auto it = state->edgeActiveLaneMasks.find(edge); ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value()); return it->second; @@ -239,8 +241,7 @@ // Ensure all dependency blocks have been generated. auto depsDone = true; - function.ForeachBlockDependency(id, [&](Block::ID dep) - { + function.ForeachBlockDependency(id, [&](Block::ID dep) { if(state->visited.count(dep) == 0) { state->pending->push_front(dep); @@ -287,7 +288,7 @@ if(!state->visited.emplace(blockId).second) { - return; // Already generated this block. + return; // Already generated this block. } if(blockId != function.entry) @@ -323,7 +324,7 @@ if(!state->visited.emplace(blockId).second) { - return; // Already emitted this loop. + return; // Already emitted this loop. } // Gather all the blocks that make up the loop. @@ -414,7 +415,7 @@ // Add active lanes to the merge lane mask. for(auto in : function.getBlock(mergeBlockId).ins) { - auto edge = Block::Edge{in, mergeBlockId}; + auto edge = Block::Edge{ in, mergeBlockId }; auto it = state->edgeActiveLaneMasks.find(edge); if(it != state->edgeActiveLaneMasks.end()) { @@ -563,9 +564,9 @@ SpirvShader::EmitResult SpirvShader::EmitFunctionCall(InsnIterator insn, EmitState *state) const { auto functionId = Function::ID(insn.word(3)); - const auto& functionIt = functions.find(functionId); + const auto &functionIt = functions.find(functionId); ASSERT(functionIt != functions.end()); - auto& function = functionIt->second; + auto &function = functionIt->second; // TODO(b/141246700): Add full support for spv::OpFunctionCall // The only supported function is a single OpKill wrapped in a @@ -608,15 +609,15 @@ switch(executionScope) { - case spv::ScopeWorkgroup: - Yield(YieldResult::ControlBarrier); - break; - case spv::ScopeSubgroup: - break; - default: - // See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module. - UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup"); - break; + case spv::ScopeWorkgroup: + Yield(YieldResult::ControlBarrier); + break; + case spv::ScopeSubgroup: + break; + default: + // See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module. + UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup"); + break; } return EmitResult::Continue; @@ -654,7 +655,7 @@ } } -void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const +void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const &filter) const { auto typeId = Type::ID(insn.word(1)); auto type = getType(typeId); @@ -688,7 +689,7 @@ { if(semantics == spv::MemorySemanticsMaskNone) { - return; //no-op + return; //no-op } rr::Fence(MemoryOrder(semantics)); }
diff --git a/src/Pipeline/SpirvShaderEnumNames.cpp b/src/Pipeline/SpirvShaderEnumNames.cpp index 34cadd4..39a0bf0 100644 --- a/src/Pipeline/SpirvShaderEnumNames.cpp +++ b/src/Pipeline/SpirvShaderEnumNames.cpp
@@ -14,15 +14,15 @@ // This file contains code used to aid debugging. -#include <spirv/unified1/spirv.h> #include "SpirvShader.hpp" +#include <spirv/unified1/spirv.h> // Prototypes for SPIRV-Tools functions that do not have public headers. // This is a C++ function, so the name is mangled, and signature changes will // result in a linker error instead of runtime signature mismatches. // Gets the name of an instruction, without the "Op" prefix. -extern const char* spvOpcodeString(const SpvOp opcode); +extern const char *spvOpcodeString(const SpvOp opcode); namespace sw { @@ -31,4 +31,4 @@ return spvOpcodeString(static_cast<SpvOp>(op)); } -} // namespace sw +} // namespace sw
diff --git a/src/Pipeline/SpirvShaderGLSLstd450.cpp b/src/Pipeline/SpirvShaderGLSLstd450.cpp index 50ae6a5..f6aaeca 100644 --- a/src/Pipeline/SpirvShaderGLSLstd450.cpp +++ b/src/Pipeline/SpirvShaderGLSLstd450.cpp
@@ -16,12 +16,11 @@ #include "ShaderCore.hpp" -#include <spirv/unified1/spirv.hpp> #include <spirv/unified1/GLSL.std.450.h> +#include <spirv/unified1/spirv.hpp> -namespace -{ - constexpr float PI = 3.141592653589793f; +namespace { +constexpr float PI = 3.141592653589793f; } namespace sw { @@ -34,915 +33,925 @@ switch(extInstIndex) { - case GLSLstd450FAbs: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) + case GLSLstd450FAbs: { - dst.move(i, Abs(src.Float(i))); - } - break; - } - case GLSLstd450SAbs: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Abs(src.Int(i))); - } - break; - } - case GLSLstd450Cross: - { - auto lhs = GenericValue(this, state, insn.word(5)); - auto rhs = GenericValue(this, state, insn.word(6)); - dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2)); - dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0)); - dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1)); - break; - } - case GLSLstd450Floor: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Floor(src.Float(i))); - } - break; - } - case GLSLstd450Trunc: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Trunc(src.Float(i))); - } - break; - } - case GLSLstd450Ceil: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Ceil(src.Float(i))); - } - break; - } - case GLSLstd450Fract: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Frac(src.Float(i))); - } - break; - } - case GLSLstd450Round: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Round(src.Float(i))); - } - break; - } - case GLSLstd450RoundEven: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - auto x = Round(src.Float(i)); - // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src)); - dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) * - SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1))); - } - break; - } - case GLSLstd450FMin: - { - auto lhs = GenericValue(this, state, insn.word(5)); - auto rhs = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Min(lhs.Float(i), rhs.Float(i))); - } - break; - } - case GLSLstd450FMax: - { - auto lhs = GenericValue(this, state, insn.word(5)); - auto rhs = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Max(lhs.Float(i), rhs.Float(i))); - } - break; - } - case GLSLstd450SMin: - { - auto lhs = GenericValue(this, state, insn.word(5)); - auto rhs = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Min(lhs.Int(i), rhs.Int(i))); - } - break; - } - case GLSLstd450SMax: - { - auto lhs = GenericValue(this, state, insn.word(5)); - auto rhs = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Max(lhs.Int(i), rhs.Int(i))); - } - break; - } - case GLSLstd450UMin: - { - auto lhs = GenericValue(this, state, insn.word(5)); - auto rhs = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Min(lhs.UInt(i), rhs.UInt(i))); - } - break; - } - case GLSLstd450UMax: - { - auto lhs = GenericValue(this, state, insn.word(5)); - auto rhs = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Max(lhs.UInt(i), rhs.UInt(i))); - } - break; - } - case GLSLstd450Step: - { - auto edge = GenericValue(this, state, insn.word(5)); - auto x = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f))); - } - break; - } - case GLSLstd450SmoothStep: - { - auto edge0 = GenericValue(this, state, insn.word(5)); - auto edge1 = GenericValue(this, state, insn.word(6)); - auto x = GenericValue(this, state, insn.word(7)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - auto tx = Min(Max((x.Float(i) - edge0.Float(i)) / - (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f)); - dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx)); - } - break; - } - case GLSLstd450FMix: - { - auto x = GenericValue(this, state, insn.word(5)); - auto y = GenericValue(this, state, insn.word(6)); - auto a = GenericValue(this, state, insn.word(7)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i)); - } - break; - } - case GLSLstd450FClamp: - { - auto x = GenericValue(this, state, insn.word(5)); - auto minVal = GenericValue(this, state, insn.word(6)); - auto maxVal = GenericValue(this, state, insn.word(7)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i))); - } - break; - } - case GLSLstd450SClamp: - { - auto x = GenericValue(this, state, insn.word(5)); - auto minVal = GenericValue(this, state, insn.word(6)); - auto maxVal = GenericValue(this, state, insn.word(7)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i))); - } - break; - } - case GLSLstd450UClamp: - { - auto x = GenericValue(this, state, insn.word(5)); - auto minVal = GenericValue(this, state, insn.word(6)); - auto maxVal = GenericValue(this, state, insn.word(7)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i))); - } - break; - } - case GLSLstd450FSign: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f)); - auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f)); - dst.move(i, neg | pos); - } - break; - } - case GLSLstd450SSign: - { - auto src = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1); - auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1); - dst.move(i, neg | pos); - } - break; - } - case GLSLstd450Reflect: - { - auto I = GenericValue(this, state, insn.word(5)); - auto N = GenericValue(this, state, insn.word(6)); - - SIMD::Float d = Dot(type.sizeInComponents, I, N); - - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i)); - } - break; - } - case GLSLstd450Refract: - { - auto I = GenericValue(this, state, insn.word(5)); - auto N = GenericValue(this, state, insn.word(6)); - auto eta = GenericValue(this, state, insn.word(7)); - - SIMD::Float d = Dot(type.sizeInComponents, I, N); - SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d); - SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f)); - SIMD::Float t = (eta.Float(0) * d + Sqrt(k)); - - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i))); - } - break; - } - case GLSLstd450FaceForward: - { - auto N = GenericValue(this, state, insn.word(5)); - auto I = GenericValue(this, state, insn.word(6)); - auto Nref = GenericValue(this, state, insn.word(7)); - - SIMD::Float d = Dot(type.sizeInComponents, I, Nref); - SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f)); - - for(auto i = 0u; i < type.sizeInComponents; i++) - { - auto n = N.Float(i); - dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n))); - } - break; - } - case GLSLstd450Length: - { - auto x = GenericValue(this, state, insn.word(5)); - SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x); - - dst.move(0, Sqrt(d)); - break; - } - case GLSLstd450Normalize: - { - auto x = GenericValue(this, state, insn.word(5)); - SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x); - SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d); - - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, invLength * x.Float(i)); - } - break; - } - case GLSLstd450Distance: - { - auto p0 = GenericValue(this, state, insn.word(5)); - auto p1 = GenericValue(this, state, insn.word(6)); - auto p0Type = getType(p0.type); - - // sqrt(dot(p0-p1, p0-p1)) - SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0)); - - for(auto i = 1u; i < p0Type.sizeInComponents; i++) - { - d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i)); - } - - dst.move(0, Sqrt(d)); - break; - } - case GLSLstd450Modf: - { - auto val = GenericValue(this, state, insn.word(5)); - auto ptrId = Object::ID(insn.word(6)); - auto ptrTy = getType(getObject(ptrId).type); - auto ptr = GetPointerToData(ptrId, 0, state); - bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass); - // TODO: GLSL modf() takes an output parameter and thus the pointer is assumed - // to be in bounds even for inactive lanes. - // - Clarify the SPIR-V spec. - // - Eliminate lane masking and assume interleaving. - auto robustness = OutOfBoundsBehavior::UndefinedBehavior; - - for(auto i = 0u; i < type.sizeInComponents; i++) - { - SIMD::Float whole, frac; - std::tie(whole, frac) = Modf(val.Float(i)); - dst.move(i, frac); - auto p = ptr + (i * sizeof(float)); - if(interleavedByLane) { p = InterleaveByLane(p); } - p.Store(whole, robustness, state->activeLaneMask()); - } - break; - } - case GLSLstd450ModfStruct: - { - auto val = GenericValue(this, state, insn.word(5)); - auto valTy = getType(val.type); - - for(auto i = 0u; i < valTy.sizeInComponents; i++) - { - SIMD::Float whole, frac; - std::tie(whole, frac) = Modf(val.Float(i)); - dst.move(i, frac); - dst.move(i + valTy.sizeInComponents, whole); - } - break; - } - case GLSLstd450PackSnorm4x8: - { - auto val = GenericValue(this, state, insn.word(5)); - dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & - SIMD::Int(0xFF)) | - ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & - SIMD::Int(0xFF)) << 8) | - ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & - SIMD::Int(0xFF)) << 16) | - ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & - SIMD::Int(0xFF)) << 24)); - break; - } - case GLSLstd450PackUnorm4x8: - { - auto val = GenericValue(this, state, insn.word(5)); - dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) | - ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) | - ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) | - ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24)); - break; - } - case GLSLstd450PackSnorm2x16: - { - auto val = GenericValue(this, state, insn.word(5)); - dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) & - SIMD::Int(0xFFFF)) | - ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) & - SIMD::Int(0xFFFF)) << 16)); - break; - } - case GLSLstd450PackUnorm2x16: - { - auto val = GenericValue(this, state, insn.word(5)); - dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) & - SIMD::UInt(0xFFFF)) | - ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) & - SIMD::UInt(0xFFFF)) << 16)); - break; - } - case GLSLstd450PackHalf2x16: - { - auto val = GenericValue(this, state, insn.word(5)); - dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true)); - break; - } - case GLSLstd450UnpackSnorm4x8: - { - auto val = GenericValue(this, state, insn.word(5)); - dst.move(0, Min(Max(SIMD::Float(((val.Int(0)<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - dst.move(1, Min(Max(SIMD::Float(((val.Int(0)<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - dst.move(2, Min(Max(SIMD::Float(((val.Int(0)<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - break; - } - case GLSLstd450UnpackUnorm4x8: - { - auto val = GenericValue(this, state, insn.word(5)); - dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(1, SIMD::Float(((val.UInt(0)>>8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(2, SIMD::Float(((val.UInt(0)>>16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(3, SIMD::Float(((val.UInt(0)>>24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f)); - break; - } - case GLSLstd450UnpackSnorm2x16: - { - auto val = GenericValue(this, state, insn.word(5)); - // clamp(f / 32767.0, -1.0, 1.0) - dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) * - SIMD::Float(1.0f / float(0x7FFF0000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)), - SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - break; - } - case GLSLstd450UnpackUnorm2x16: - { - auto val = GenericValue(this, state, insn.word(5)); - // f / 65535.0 - dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000))); - dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000))); - break; - } - case GLSLstd450UnpackHalf2x16: - { - auto val = GenericValue(this, state, insn.word(5)); - dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF))); - dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16)); - break; - } - case GLSLstd450Fma: - { - auto a = GenericValue(this, state, insn.word(5)); - auto b = GenericValue(this, state, insn.word(6)); - auto c = GenericValue(this, state, insn.word(7)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i))); - } - break; - } - case GLSLstd450Frexp: - { - auto val = GenericValue(this, state, insn.word(5)); - auto ptrId = Object::ID(insn.word(6)); - auto ptrTy = getType(getObject(ptrId).type); - auto ptr = GetPointerToData(ptrId, 0, state); - bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass); - // TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed - // to be in bounds even for inactive lanes. - // - Clarify the SPIR-V spec. - // - Eliminate lane masking and assume interleaving. - auto robustness = OutOfBoundsBehavior::UndefinedBehavior; - - for(auto i = 0u; i < type.sizeInComponents; i++) - { - SIMD::Float significand; - SIMD::Int exponent; - std::tie(significand, exponent) = Frexp(val.Float(i)); - - dst.move(i, significand); - - auto p = ptr + (i * sizeof(float)); - if(interleavedByLane) { p = InterleaveByLane(p); } - p.Store(exponent, robustness, state->activeLaneMask()); - } - break; - } - case GLSLstd450FrexpStruct: - { - auto val = GenericValue(this, state, insn.word(5)); - auto numComponents = getType(val.type).sizeInComponents; - for(auto i = 0u; i < numComponents; i++) - { - auto significandAndExponent = Frexp(val.Float(i)); - dst.move(i, significandAndExponent.first); - dst.move(i + numComponents, significandAndExponent.second); - } - break; - } - case GLSLstd450Ldexp: - { - auto significand = GenericValue(this, state, insn.word(5)); - auto exponent = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - // Assumes IEEE 754 - auto in = significand.Float(i); - auto significandExponent = Exponent(in); - auto combinedExponent = exponent.Int(i) + significandExponent; - auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0))); - auto isSignificandInf = SIMD::UInt(IsInf(in)); - auto isSignificandNaN = SIMD::UInt(IsNan(in)); - auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126))); - auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128))); - auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge; - - SIMD::UInt v; - v = significand.UInt(i) & SIMD::UInt(0x7FFFFF); // Add significand. - v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23)); // Add exponent. - v &= isExponentInBounds; // Clear v if the exponent is OOB. - - v |= significand.UInt(i) & SIMD::UInt(0x80000000); // Add sign bit. - v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000); // Mark as inf if the exponent is too great. - - // If the input significand is zero, inf or nan, just return the - // input significand. - auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN; - v = (v & ~passthrough) | (significand.UInt(i) & passthrough); - - dst.move(i, As<SIMD::Float>(v)); - } - break; - } - case GLSLstd450Radians: - { - auto degrees = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f)); - } - break; - } - case GLSLstd450Degrees: - { - auto radians = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI)); - } - break; - } - case GLSLstd450Sin: - { - auto radians = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Sin(radians.Float(i))); - } - break; - } - case GLSLstd450Cos: - { - auto radians = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Cos(radians.Float(i))); - } - break; - } - case GLSLstd450Tan: - { - auto radians = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Tan(radians.Float(i))); - } - break; - } - case GLSLstd450Asin: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Asin(val.Float(i))); - } - break; - } - case GLSLstd450Acos: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Acos(val.Float(i))); - } - break; - } - case GLSLstd450Atan: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Atan(val.Float(i))); - } - break; - } - case GLSLstd450Sinh: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Sinh(val.Float(i))); - } - break; - } - case GLSLstd450Cosh: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Cosh(val.Float(i))); - } - break; - } - case GLSLstd450Tanh: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Tanh(val.Float(i))); - } - break; - } - case GLSLstd450Asinh: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Asinh(val.Float(i))); - } - break; - } - case GLSLstd450Acosh: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Acosh(val.Float(i))); - } - break; - } - case GLSLstd450Atanh: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Atanh(val.Float(i))); - } - break; - } - case GLSLstd450Atan2: - { - auto x = GenericValue(this, state, insn.word(5)); - auto y = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Atan2(x.Float(i), y.Float(i))); - } - break; - } - case GLSLstd450Pow: - { - auto x = GenericValue(this, state, insn.word(5)); - auto y = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Pow(x.Float(i), y.Float(i))); - } - break; - } - case GLSLstd450Exp: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Exp(val.Float(i))); - } - break; - } - case GLSLstd450Log: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Log(val.Float(i))); - } - break; - } - case GLSLstd450Exp2: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Exp2(val.Float(i))); - } - break; - } - case GLSLstd450Log2: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Log2(val.Float(i))); - } - break; - } - case GLSLstd450Sqrt: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, Sqrt(val.Float(i))); - } - break; - } - case GLSLstd450InverseSqrt: - { - auto val = GenericValue(this, state, insn.word(5)); - Decorations d; - ApplyDecorationsForId(&d, insn.word(5)); - if(d.RelaxedPrecision) - { + auto src = GenericValue(this, state, insn.word(5)); for(auto i = 0u; i < type.sizeInComponents; i++) { - dst.move(i, RcpSqrt_pp(val.Float(i))); + dst.move(i, Abs(src.Float(i))); } + break; } - else + case GLSLstd450SAbs: { + auto src = GenericValue(this, state, insn.word(5)); for(auto i = 0u; i < type.sizeInComponents; i++) { - dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i))); - } - } - break; - } - case GLSLstd450Determinant: - { - auto mat = GenericValue(this, state, insn.word(5)); - auto numComponents = getType(mat.type).sizeInComponents; - switch(numComponents) - { - case 4: // 2x2 - dst.move(0, Determinant( - mat.Float(0), mat.Float(1), - mat.Float(2), mat.Float(3))); - break; - case 9: // 3x3 - dst.move(0, Determinant( - mat.Float(0), mat.Float(1), mat.Float(2), - mat.Float(3), mat.Float(4), mat.Float(5), - mat.Float(6), mat.Float(7), mat.Float(8))); - break; - case 16: // 4x4 - dst.move(0, Determinant( - mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3), - mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7), - mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11), - mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15))); - break; - default: - UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents)); - } - break; - } - case GLSLstd450MatrixInverse: - { - auto mat = GenericValue(this, state, insn.word(5)); - auto numComponents = getType(mat.type).sizeInComponents; - switch(numComponents) - { - case 4: // 2x2 - { - auto inv = MatrixInverse( - mat.Float(0), mat.Float(1), - mat.Float(2), mat.Float(3)); - for(uint32_t i = 0; i < inv.size(); i++) - { - dst.move(i, inv[i]); + dst.move(i, Abs(src.Int(i))); } break; } - case 9: // 3x3 + case GLSLstd450Cross: { - auto inv = MatrixInverse( - mat.Float(0), mat.Float(1), mat.Float(2), - mat.Float(3), mat.Float(4), mat.Float(5), - mat.Float(6), mat.Float(7), mat.Float(8)); - for(uint32_t i = 0; i < inv.size(); i++) + auto lhs = GenericValue(this, state, insn.word(5)); + auto rhs = GenericValue(this, state, insn.word(6)); + dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2)); + dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0)); + dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1)); + break; + } + case GLSLstd450Floor: + { + auto src = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) { - dst.move(i, inv[i]); + dst.move(i, Floor(src.Float(i))); } break; } - case 16: // 4x4 + case GLSLstd450Trunc: { - auto inv = MatrixInverse( - mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3), - mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7), - mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11), - mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)); - for(uint32_t i = 0; i < inv.size(); i++) + auto src = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) { - dst.move(i, inv[i]); + dst.move(i, Trunc(src.Float(i))); + } + break; + } + case GLSLstd450Ceil: + { + auto src = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Ceil(src.Float(i))); + } + break; + } + case GLSLstd450Fract: + { + auto src = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Frac(src.Float(i))); + } + break; + } + case GLSLstd450Round: + { + auto src = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Round(src.Float(i))); + } + break; + } + case GLSLstd450RoundEven: + { + auto src = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + auto x = Round(src.Float(i)); + // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src)); + dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) * + SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1))); + } + break; + } + case GLSLstd450FMin: + { + auto lhs = GenericValue(this, state, insn.word(5)); + auto rhs = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Min(lhs.Float(i), rhs.Float(i))); + } + break; + } + case GLSLstd450FMax: + { + auto lhs = GenericValue(this, state, insn.word(5)); + auto rhs = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Max(lhs.Float(i), rhs.Float(i))); + } + break; + } + case GLSLstd450SMin: + { + auto lhs = GenericValue(this, state, insn.word(5)); + auto rhs = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Min(lhs.Int(i), rhs.Int(i))); + } + break; + } + case GLSLstd450SMax: + { + auto lhs = GenericValue(this, state, insn.word(5)); + auto rhs = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Max(lhs.Int(i), rhs.Int(i))); + } + break; + } + case GLSLstd450UMin: + { + auto lhs = GenericValue(this, state, insn.word(5)); + auto rhs = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Min(lhs.UInt(i), rhs.UInt(i))); + } + break; + } + case GLSLstd450UMax: + { + auto lhs = GenericValue(this, state, insn.word(5)); + auto rhs = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Max(lhs.UInt(i), rhs.UInt(i))); + } + break; + } + case GLSLstd450Step: + { + auto edge = GenericValue(this, state, insn.word(5)); + auto x = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f))); + } + break; + } + case GLSLstd450SmoothStep: + { + auto edge0 = GenericValue(this, state, insn.word(5)); + auto edge1 = GenericValue(this, state, insn.word(6)); + auto x = GenericValue(this, state, insn.word(7)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + auto tx = Min(Max((x.Float(i) - edge0.Float(i)) / + (edge1.Float(i) - edge0.Float(i)), + SIMD::Float(0.0f)), + SIMD::Float(1.0f)); + dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx)); + } + break; + } + case GLSLstd450FMix: + { + auto x = GenericValue(this, state, insn.word(5)); + auto y = GenericValue(this, state, insn.word(6)); + auto a = GenericValue(this, state, insn.word(7)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i)); + } + break; + } + case GLSLstd450FClamp: + { + auto x = GenericValue(this, state, insn.word(5)); + auto minVal = GenericValue(this, state, insn.word(6)); + auto maxVal = GenericValue(this, state, insn.word(7)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i))); + } + break; + } + case GLSLstd450SClamp: + { + auto x = GenericValue(this, state, insn.word(5)); + auto minVal = GenericValue(this, state, insn.word(6)); + auto maxVal = GenericValue(this, state, insn.word(7)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i))); + } + break; + } + case GLSLstd450UClamp: + { + auto x = GenericValue(this, state, insn.word(5)); + auto minVal = GenericValue(this, state, insn.word(6)); + auto maxVal = GenericValue(this, state, insn.word(7)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i))); + } + break; + } + case GLSLstd450FSign: + { + auto src = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f)); + auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f)); + dst.move(i, neg | pos); + } + break; + } + case GLSLstd450SSign: + { + auto src = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1); + auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1); + dst.move(i, neg | pos); + } + break; + } + case GLSLstd450Reflect: + { + auto I = GenericValue(this, state, insn.word(5)); + auto N = GenericValue(this, state, insn.word(6)); + + SIMD::Float d = Dot(type.sizeInComponents, I, N); + + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i)); + } + break; + } + case GLSLstd450Refract: + { + auto I = GenericValue(this, state, insn.word(5)); + auto N = GenericValue(this, state, insn.word(6)); + auto eta = GenericValue(this, state, insn.word(7)); + + SIMD::Float d = Dot(type.sizeInComponents, I, N); + SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d); + SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f)); + SIMD::Float t = (eta.Float(0) * d + Sqrt(k)); + + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i))); + } + break; + } + case GLSLstd450FaceForward: + { + auto N = GenericValue(this, state, insn.word(5)); + auto I = GenericValue(this, state, insn.word(6)); + auto Nref = GenericValue(this, state, insn.word(7)); + + SIMD::Float d = Dot(type.sizeInComponents, I, Nref); + SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f)); + + for(auto i = 0u; i < type.sizeInComponents; i++) + { + auto n = N.Float(i); + dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n))); + } + break; + } + case GLSLstd450Length: + { + auto x = GenericValue(this, state, insn.word(5)); + SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x); + + dst.move(0, Sqrt(d)); + break; + } + case GLSLstd450Normalize: + { + auto x = GenericValue(this, state, insn.word(5)); + SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x); + SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d); + + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, invLength * x.Float(i)); + } + break; + } + case GLSLstd450Distance: + { + auto p0 = GenericValue(this, state, insn.word(5)); + auto p1 = GenericValue(this, state, insn.word(6)); + auto p0Type = getType(p0.type); + + // sqrt(dot(p0-p1, p0-p1)) + SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0)); + + for(auto i = 1u; i < p0Type.sizeInComponents; i++) + { + d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i)); + } + + dst.move(0, Sqrt(d)); + break; + } + case GLSLstd450Modf: + { + auto val = GenericValue(this, state, insn.word(5)); + auto ptrId = Object::ID(insn.word(6)); + auto ptrTy = getType(getObject(ptrId).type); + auto ptr = GetPointerToData(ptrId, 0, state); + bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass); + // TODO: GLSL modf() takes an output parameter and thus the pointer is assumed + // to be in bounds even for inactive lanes. + // - Clarify the SPIR-V spec. + // - Eliminate lane masking and assume interleaving. + auto robustness = OutOfBoundsBehavior::UndefinedBehavior; + + for(auto i = 0u; i < type.sizeInComponents; i++) + { + SIMD::Float whole, frac; + std::tie(whole, frac) = Modf(val.Float(i)); + dst.move(i, frac); + auto p = ptr + (i * sizeof(float)); + if(interleavedByLane) { p = InterleaveByLane(p); } + p.Store(whole, robustness, state->activeLaneMask()); + } + break; + } + case GLSLstd450ModfStruct: + { + auto val = GenericValue(this, state, insn.word(5)); + auto valTy = getType(val.type); + + for(auto i = 0u; i < valTy.sizeInComponents; i++) + { + SIMD::Float whole, frac; + std::tie(whole, frac) = Modf(val.Float(i)); + dst.move(i, frac); + dst.move(i + valTy.sizeInComponents, whole); + } + break; + } + case GLSLstd450PackSnorm4x8: + { + auto val = GenericValue(this, state, insn.word(5)); + dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & + SIMD::Int(0xFF)) | + ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & + SIMD::Int(0xFF)) + << 8) | + ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & + SIMD::Int(0xFF)) + << 16) | + ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & + SIMD::Int(0xFF)) + << 24)); + break; + } + case GLSLstd450PackUnorm4x8: + { + auto val = GenericValue(this, state, insn.word(5)); + dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) | + ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) | + ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) | + ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24)); + break; + } + case GLSLstd450PackSnorm2x16: + { + auto val = GenericValue(this, state, insn.word(5)); + dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) & + SIMD::Int(0xFFFF)) | + ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) & + SIMD::Int(0xFFFF)) + << 16)); + break; + } + case GLSLstd450PackUnorm2x16: + { + auto val = GenericValue(this, state, insn.word(5)); + dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) & + SIMD::UInt(0xFFFF)) | + ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) & + SIMD::UInt(0xFFFF)) + << 16)); + break; + } + case GLSLstd450PackHalf2x16: + { + auto val = GenericValue(this, state, insn.word(5)); + dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true)); + break; + } + case GLSLstd450UnpackSnorm4x8: + { + auto val = GenericValue(this, state, insn.word(5)); + dst.move(0, Min(Max(SIMD::Float(((val.Int(0) << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); + dst.move(1, Min(Max(SIMD::Float(((val.Int(0) << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); + dst.move(2, Min(Max(SIMD::Float(((val.Int(0) << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); + dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); + break; + } + case GLSLstd450UnpackUnorm4x8: + { + auto val = GenericValue(this, state, insn.word(5)); + dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(1, SIMD::Float(((val.UInt(0) >> 8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(2, SIMD::Float(((val.UInt(0) >> 16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(3, SIMD::Float(((val.UInt(0) >> 24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f)); + break; + } + case GLSLstd450UnpackSnorm2x16: + { + auto val = GenericValue(this, state, insn.word(5)); + // clamp(f / 32767.0, -1.0, 1.0) + dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) * + SIMD::Float(1.0f / float(0x7FFF0000)), + SIMD::Float(-1.0f)), + SIMD::Float(1.0f))); + dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)), + SIMD::Float(-1.0f)), + SIMD::Float(1.0f))); + break; + } + case GLSLstd450UnpackUnorm2x16: + { + auto val = GenericValue(this, state, insn.word(5)); + // f / 65535.0 + dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000))); + dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000))); + break; + } + case GLSLstd450UnpackHalf2x16: + { + auto val = GenericValue(this, state, insn.word(5)); + dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF))); + dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16)); + break; + } + case GLSLstd450Fma: + { + auto a = GenericValue(this, state, insn.word(5)); + auto b = GenericValue(this, state, insn.word(6)); + auto c = GenericValue(this, state, insn.word(7)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i))); + } + break; + } + case GLSLstd450Frexp: + { + auto val = GenericValue(this, state, insn.word(5)); + auto ptrId = Object::ID(insn.word(6)); + auto ptrTy = getType(getObject(ptrId).type); + auto ptr = GetPointerToData(ptrId, 0, state); + bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass); + // TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed + // to be in bounds even for inactive lanes. + // - Clarify the SPIR-V spec. + // - Eliminate lane masking and assume interleaving. + auto robustness = OutOfBoundsBehavior::UndefinedBehavior; + + for(auto i = 0u; i < type.sizeInComponents; i++) + { + SIMD::Float significand; + SIMD::Int exponent; + std::tie(significand, exponent) = Frexp(val.Float(i)); + + dst.move(i, significand); + + auto p = ptr + (i * sizeof(float)); + if(interleavedByLane) { p = InterleaveByLane(p); } + p.Store(exponent, robustness, state->activeLaneMask()); + } + break; + } + case GLSLstd450FrexpStruct: + { + auto val = GenericValue(this, state, insn.word(5)); + auto numComponents = getType(val.type).sizeInComponents; + for(auto i = 0u; i < numComponents; i++) + { + auto significandAndExponent = Frexp(val.Float(i)); + dst.move(i, significandAndExponent.first); + dst.move(i + numComponents, significandAndExponent.second); + } + break; + } + case GLSLstd450Ldexp: + { + auto significand = GenericValue(this, state, insn.word(5)); + auto exponent = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + // Assumes IEEE 754 + auto in = significand.Float(i); + auto significandExponent = Exponent(in); + auto combinedExponent = exponent.Int(i) + significandExponent; + auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0))); + auto isSignificandInf = SIMD::UInt(IsInf(in)); + auto isSignificandNaN = SIMD::UInt(IsNan(in)); + auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126))); + auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128))); + auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge; + + SIMD::UInt v; + v = significand.UInt(i) & SIMD::UInt(0x7FFFFF); // Add significand. + v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23)); // Add exponent. + v &= isExponentInBounds; // Clear v if the exponent is OOB. + + v |= significand.UInt(i) & SIMD::UInt(0x80000000); // Add sign bit. + v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000); // Mark as inf if the exponent is too great. + + // If the input significand is zero, inf or nan, just return the + // input significand. + auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN; + v = (v & ~passthrough) | (significand.UInt(i) & passthrough); + + dst.move(i, As<SIMD::Float>(v)); + } + break; + } + case GLSLstd450Radians: + { + auto degrees = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f)); + } + break; + } + case GLSLstd450Degrees: + { + auto radians = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI)); + } + break; + } + case GLSLstd450Sin: + { + auto radians = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Sin(radians.Float(i))); + } + break; + } + case GLSLstd450Cos: + { + auto radians = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Cos(radians.Float(i))); + } + break; + } + case GLSLstd450Tan: + { + auto radians = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Tan(radians.Float(i))); + } + break; + } + case GLSLstd450Asin: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Asin(val.Float(i))); + } + break; + } + case GLSLstd450Acos: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Acos(val.Float(i))); + } + break; + } + case GLSLstd450Atan: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Atan(val.Float(i))); + } + break; + } + case GLSLstd450Sinh: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Sinh(val.Float(i))); + } + break; + } + case GLSLstd450Cosh: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Cosh(val.Float(i))); + } + break; + } + case GLSLstd450Tanh: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Tanh(val.Float(i))); + } + break; + } + case GLSLstd450Asinh: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Asinh(val.Float(i))); + } + break; + } + case GLSLstd450Acosh: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Acosh(val.Float(i))); + } + break; + } + case GLSLstd450Atanh: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Atanh(val.Float(i))); + } + break; + } + case GLSLstd450Atan2: + { + auto x = GenericValue(this, state, insn.word(5)); + auto y = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Atan2(x.Float(i), y.Float(i))); + } + break; + } + case GLSLstd450Pow: + { + auto x = GenericValue(this, state, insn.word(5)); + auto y = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Pow(x.Float(i), y.Float(i))); + } + break; + } + case GLSLstd450Exp: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Exp(val.Float(i))); + } + break; + } + case GLSLstd450Log: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Log(val.Float(i))); + } + break; + } + case GLSLstd450Exp2: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Exp2(val.Float(i))); + } + break; + } + case GLSLstd450Log2: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Log2(val.Float(i))); + } + break; + } + case GLSLstd450Sqrt: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, Sqrt(val.Float(i))); + } + break; + } + case GLSLstd450InverseSqrt: + { + auto val = GenericValue(this, state, insn.word(5)); + Decorations d; + ApplyDecorationsForId(&d, insn.word(5)); + if(d.RelaxedPrecision) + { + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, RcpSqrt_pp(val.Float(i))); + } + } + else + { + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i))); + } + } + break; + } + case GLSLstd450Determinant: + { + auto mat = GenericValue(this, state, insn.word(5)); + auto numComponents = getType(mat.type).sizeInComponents; + switch(numComponents) + { + case 4: // 2x2 + dst.move(0, Determinant( + mat.Float(0), mat.Float(1), + mat.Float(2), mat.Float(3))); + break; + case 9: // 3x3 + dst.move(0, Determinant( + mat.Float(0), mat.Float(1), mat.Float(2), + mat.Float(3), mat.Float(4), mat.Float(5), + mat.Float(6), mat.Float(7), mat.Float(8))); + break; + case 16: // 4x4 + dst.move(0, Determinant( + mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3), + mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7), + mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11), + mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15))); + break; + default: + UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents)); + } + break; + } + case GLSLstd450MatrixInverse: + { + auto mat = GenericValue(this, state, insn.word(5)); + auto numComponents = getType(mat.type).sizeInComponents; + switch(numComponents) + { + case 4: // 2x2 + { + auto inv = MatrixInverse( + mat.Float(0), mat.Float(1), + mat.Float(2), mat.Float(3)); + for(uint32_t i = 0; i < inv.size(); i++) + { + dst.move(i, inv[i]); + } + break; + } + case 9: // 3x3 + { + auto inv = MatrixInverse( + mat.Float(0), mat.Float(1), mat.Float(2), + mat.Float(3), mat.Float(4), mat.Float(5), + mat.Float(6), mat.Float(7), mat.Float(8)); + for(uint32_t i = 0; i < inv.size(); i++) + { + dst.move(i, inv[i]); + } + break; + } + case 16: // 4x4 + { + auto inv = MatrixInverse( + mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3), + mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7), + mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11), + mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)); + for(uint32_t i = 0; i < inv.size(); i++) + { + dst.move(i, inv[i]); + } + break; + } + default: + UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents)); + } + break; + } + case GLSLstd450IMix: + { + UNREACHABLE("GLSLstd450IMix has been removed from the specification"); + break; + } + case GLSLstd450PackDouble2x32: + { + UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)"); + break; + } + case GLSLstd450UnpackDouble2x32: + { + UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)"); + break; + } + case GLSLstd450FindILsb: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + auto v = val.UInt(i); + dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0))); + } + break; + } + case GLSLstd450FindSMsb: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0))); + dst.move(i, SIMD::UInt(31) - Ctlz(v, false)); + } + break; + } + case GLSLstd450FindUMsb: + { + auto val = GenericValue(this, state, insn.word(5)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false)); + } + break; + } + case GLSLstd450InterpolateAtCentroid: + { + UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)"); + break; + } + case GLSLstd450InterpolateAtSample: + { + UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)"); + break; + } + case GLSLstd450InterpolateAtOffset: + { + UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)"); + break; + } + case GLSLstd450NMin: + { + auto x = GenericValue(this, state, insn.word(5)); + auto y = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, NMin(x.Float(i), y.Float(i))); + } + break; + } + case GLSLstd450NMax: + { + auto x = GenericValue(this, state, insn.word(5)); + auto y = GenericValue(this, state, insn.word(6)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, NMax(x.Float(i), y.Float(i))); + } + break; + } + case GLSLstd450NClamp: + { + auto x = GenericValue(this, state, insn.word(5)); + auto minVal = GenericValue(this, state, insn.word(6)); + auto maxVal = GenericValue(this, state, insn.word(7)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i)); + dst.move(i, clamp); } break; } default: - UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents)); - } - break; - } - case GLSLstd450IMix: - { - UNREACHABLE("GLSLstd450IMix has been removed from the specification"); - break; - } - case GLSLstd450PackDouble2x32: - { - UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)"); - break; - } - case GLSLstd450UnpackDouble2x32: - { - UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)"); - break; - } - case GLSLstd450FindILsb: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - auto v = val.UInt(i); - dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0))); - } - break; - } - case GLSLstd450FindSMsb: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0))); - dst.move(i, SIMD::UInt(31) - Ctlz(v, false)); - } - break; - } - case GLSLstd450FindUMsb: - { - auto val = GenericValue(this, state, insn.word(5)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false)); - } - break; - } - case GLSLstd450InterpolateAtCentroid: - { - UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)"); - break; - } - case GLSLstd450InterpolateAtSample: - { - UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)"); - break; - } - case GLSLstd450InterpolateAtOffset: - { - UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)"); - break; - } - case GLSLstd450NMin: - { - auto x = GenericValue(this, state, insn.word(5)); - auto y = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, NMin(x.Float(i), y.Float(i))); - } - break; - } - case GLSLstd450NMax: - { - auto x = GenericValue(this, state, insn.word(5)); - auto y = GenericValue(this, state, insn.word(6)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, NMax(x.Float(i), y.Float(i))); - } - break; - } - case GLSLstd450NClamp: - { - auto x = GenericValue(this, state, insn.word(5)); - auto minVal = GenericValue(this, state, insn.word(6)); - auto maxVal = GenericValue(this, state, insn.word(7)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i)); - dst.move(i, clamp); - } - break; - } - default: - UNREACHABLE("ExtInst %d", int(extInstIndex)); - break; + UNREACHABLE("ExtInst %d", int(extInstIndex)); + break; } return EmitResult::Continue;
diff --git a/src/Pipeline/SpirvShaderGroup.cpp b/src/Pipeline/SpirvShaderGroup.cpp index cc46949..77b73ea 100644 --- a/src/Pipeline/SpirvShaderGroup.cpp +++ b/src/Pipeline/SpirvShaderGroup.cpp
@@ -18,20 +18,21 @@ namespace sw { -struct SpirvShader::GroupOps { +struct SpirvShader::GroupOps +{ // Template function to perform a binary operation. // |TYPE| should be the type of the identity value (as an SIMD::<Type>). // |APPLY| should be a callable object that takes two RValue<TYPE> parameters // and returns a new RValue<TYPE> corresponding to the operation's result. - template <typename TYPE, typename APPLY> + template<typename TYPE, typename APPLY> static void BinaryOperation( - const SpirvShader* shader, - const SpirvShader::InsnIterator& insn, - const SpirvShader::EmitState* state, - Intermediate& dst, - const TYPE& identity, - APPLY&& apply) + const SpirvShader *shader, + const SpirvShader::InsnIterator &insn, + const SpirvShader::EmitState *state, + Intermediate &dst, + const TYPE &identity, + APPLY &&apply) { SpirvShader::GenericValue value(shader, state, insn.word(5)); auto &type = shader->getType(SpirvShader::Type::ID(insn.word(1))); @@ -42,40 +43,38 @@ TYPE v = As<TYPE>(v_uint); switch(spv::GroupOperation(insn.word(4))) { - case spv::GroupOperationReduce: - { - // NOTE: floating-point add and multiply are not really commutative so - // ensure that all values in the final lanes are identical - TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw] - TYPE v3 = apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw] - dst.move(i, v3); - break; - } - case spv::GroupOperationInclusiveScan: - { - TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] - TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] - dst.move(i, v3); - break; - } - case spv::GroupOperationExclusiveScan: - { - TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] - TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] - auto v4 = Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz] - dst.move(i, v4); - break; - } - default: - UNIMPLEMENTED("EmitGroupNonUniform op: %s Group operation: %d", - SpirvShader::OpcodeName(type.opcode()).c_str(), insn.word(4)); + case spv::GroupOperationReduce: + { + // NOTE: floating-point add and multiply are not really commutative so + // ensure that all values in the final lanes are identical + TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw] + TYPE v3 = apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw] + dst.move(i, v3); + break; + } + case spv::GroupOperationInclusiveScan: + { + TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] + TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] + dst.move(i, v3); + break; + } + case spv::GroupOperationExclusiveScan: + { + TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] + TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] + auto v4 = Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz] + dst.move(i, v4); + break; + } + default: + UNIMPLEMENTED("EmitGroupNonUniform op: %s Group operation: %d", + SpirvShader::OpcodeName(type.opcode()).c_str(), insn.word(4)); } } } - }; - SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const { static_assert(SIMD::Width == 4, "EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4"); @@ -89,420 +88,404 @@ switch(insn.opcode()) { - case spv::OpGroupNonUniformElect: - { - // Result is true only in the active invocation with the lowest id - // in the group, otherwise result is false. - SIMD::Int active = state->activeLaneMask(); - // TODO: Would be nice if we could write this as: - // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx) - auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx)); - dst.move(0, elect); - break; - } - - case spv::OpGroupNonUniformAll: - { - GenericValue predicate(this, state, insn.word(4)); - dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(state->activeLaneMask()))); - break; - } - - case spv::OpGroupNonUniformAny: - { - GenericValue predicate(this, state, insn.word(4)); - dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(state->activeLaneMask()))); - break; - } - - case spv::OpGroupNonUniformAllEqual: - { - GenericValue value(this, state, insn.word(4)); - auto res = SIMD::UInt(0xffffffff); - SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask()); - SIMD::UInt inactive = ~active; - for(auto i = 0u; i < type.sizeInComponents; i++) + case spv::OpGroupNonUniformElect: { - SIMD::UInt v = value.UInt(i) & active; - SIMD::UInt filled = v; - for(int j = 0; j < SIMD::Width - 1; j++) + // Result is true only in the active invocation with the lowest id + // in the group, otherwise result is false. + SIMD::Int active = state->activeLaneMask(); + // TODO: Would be nice if we could write this as: + // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx) + auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx)); + dst.move(0, elect); + break; + } + + case spv::OpGroupNonUniformAll: + { + GenericValue predicate(this, state, insn.word(4)); + dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(state->activeLaneMask()))); + break; + } + + case spv::OpGroupNonUniformAny: + { + GenericValue predicate(this, state, insn.word(4)); + dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(state->activeLaneMask()))); + break; + } + + case spv::OpGroupNonUniformAllEqual: + { + GenericValue value(this, state, insn.word(4)); + auto res = SIMD::UInt(0xffffffff); + SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask()); + SIMD::UInt inactive = ~active; + for(auto i = 0u; i < type.sizeInComponents; i++) { - filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value + SIMD::UInt v = value.UInt(i) & active; + SIMD::UInt filled = v; + for(int j = 0; j < SIMD::Width - 1; j++) + { + filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value + } + res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx)); } - res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx)); - } - dst.move(0, res); - break; - } - - case spv::OpGroupNonUniformBroadcast: - { - auto valueId = Object::ID(insn.word(4)); - auto id = SIMD::Int(GetConstScalarInt(insn.word(5))); - GenericValue value(this, state, valueId); - auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, OrAll(value.Int(i) & mask)); - } - break; - } - - case spv::OpGroupNonUniformBroadcastFirst: - { - auto valueId = Object::ID(insn.word(4)); - GenericValue value(this, state, valueId); - // Result is true only in the active invocation with the lowest id - // in the group, otherwise result is false. - SIMD::Int active = state->activeLaneMask(); - // TODO: Would be nice if we could write this as: - // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx) - auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - dst.move(i, OrAll(value.Int(i) & elect)); - } - break; - } - - case spv::OpGroupNonUniformBallot: - { - ASSERT(type.sizeInComponents == 4); - GenericValue predicate(this, state, insn.word(4)); - dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0)))); - dst.move(1, SIMD::Int(0)); - dst.move(2, SIMD::Int(0)); - dst.move(3, SIMD::Int(0)); - break; - } - - case spv::OpGroupNonUniformInverseBallot: - { - auto valueId = Object::ID(insn.word(4)); - ASSERT(type.sizeInComponents == 1); - ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); - GenericValue value(this, state, valueId); - auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1); - dst.move(0, -bit); - break; - } - - case spv::OpGroupNonUniformBallotBitExtract: - { - auto valueId = Object::ID(insn.word(4)); - auto indexId = Object::ID(insn.word(5)); - ASSERT(type.sizeInComponents == 1); - ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); - ASSERT(getType(getObject(indexId).type).sizeInComponents == 1); - GenericValue value(this, state, valueId); - GenericValue index(this, state, indexId); - auto vecIdx = index.Int(0) / SIMD::Int(32); - auto bitIdx = index.Int(0) & SIMD::Int(31); - auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) | - (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) | - (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) | - (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3))); - dst.move(0, -((bits >> bitIdx) & SIMD::Int(1))); - break; - } - - case spv::OpGroupNonUniformBallotBitCount: - { - auto operation = spv::GroupOperation(insn.word(4)); - auto valueId = Object::ID(insn.word(5)); - ASSERT(type.sizeInComponents == 1); - ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); - GenericValue value(this, state, valueId); - switch(operation) - { - case spv::GroupOperationReduce: - dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15))); + dst.move(0, res); break; - case spv::GroupOperationInclusiveScan: - dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15))); + } + + case spv::OpGroupNonUniformBroadcast: + { + auto valueId = Object::ID(insn.word(4)); + auto id = SIMD::Int(GetConstScalarInt(insn.word(5))); + GenericValue value(this, state, valueId); + auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, OrAll(value.Int(i) & mask)); + } break; - case spv::GroupOperationExclusiveScan: - dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7))); + } + + case spv::OpGroupNonUniformBroadcastFirst: + { + auto valueId = Object::ID(insn.word(4)); + GenericValue value(this, state, valueId); + // Result is true only in the active invocation with the lowest id + // in the group, otherwise result is false. + SIMD::Int active = state->activeLaneMask(); + // TODO: Would be nice if we could write this as: + // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx) + auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + dst.move(i, OrAll(value.Int(i) & elect)); + } break; + } + + case spv::OpGroupNonUniformBallot: + { + ASSERT(type.sizeInComponents == 4); + GenericValue predicate(this, state, insn.word(4)); + dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0)))); + dst.move(1, SIMD::Int(0)); + dst.move(2, SIMD::Int(0)); + dst.move(3, SIMD::Int(0)); + break; + } + + case spv::OpGroupNonUniformInverseBallot: + { + auto valueId = Object::ID(insn.word(4)); + ASSERT(type.sizeInComponents == 1); + ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); + GenericValue value(this, state, valueId); + auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1); + dst.move(0, -bit); + break; + } + + case spv::OpGroupNonUniformBallotBitExtract: + { + auto valueId = Object::ID(insn.word(4)); + auto indexId = Object::ID(insn.word(5)); + ASSERT(type.sizeInComponents == 1); + ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); + ASSERT(getType(getObject(indexId).type).sizeInComponents == 1); + GenericValue value(this, state, valueId); + GenericValue index(this, state, indexId); + auto vecIdx = index.Int(0) / SIMD::Int(32); + auto bitIdx = index.Int(0) & SIMD::Int(31); + auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) | + (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) | + (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) | + (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3))); + dst.move(0, -((bits >> bitIdx) & SIMD::Int(1))); + break; + } + + case spv::OpGroupNonUniformBallotBitCount: + { + auto operation = spv::GroupOperation(insn.word(4)); + auto valueId = Object::ID(insn.word(5)); + ASSERT(type.sizeInComponents == 1); + ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); + GenericValue value(this, state, valueId); + switch(operation) + { + case spv::GroupOperationReduce: + dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15))); + break; + case spv::GroupOperationInclusiveScan: + dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15))); + break; + case spv::GroupOperationExclusiveScan: + dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7))); + break; + default: + UNSUPPORTED("GroupOperation %d", int(operation)); + } + break; + } + + case spv::OpGroupNonUniformBallotFindLSB: + { + auto valueId = Object::ID(insn.word(4)); + ASSERT(type.sizeInComponents == 1); + ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); + GenericValue value(this, state, valueId); + dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true)); + break; + } + + case spv::OpGroupNonUniformBallotFindMSB: + { + auto valueId = Object::ID(insn.word(4)); + ASSERT(type.sizeInComponents == 1); + ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); + GenericValue value(this, state, valueId); + dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false)); + break; + } + + case spv::OpGroupNonUniformShuffle: + { + GenericValue value(this, state, insn.word(4)); + GenericValue id(this, state, insn.word(5)); + auto x = CmpEQ(SIMD::Int(0), id.Int(0)); + auto y = CmpEQ(SIMD::Int(1), id.Int(0)); + auto z = CmpEQ(SIMD::Int(2), id.Int(0)); + auto w = CmpEQ(SIMD::Int(3), id.Int(0)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + SIMD::Int v = value.Int(i); + dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww)); + } + break; + } + + case spv::OpGroupNonUniformShuffleXor: + { + GenericValue value(this, state, insn.word(4)); + GenericValue mask(this, state, insn.word(5)); + auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); + auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); + auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); + auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + SIMD::Int v = value.Int(i); + dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww)); + } + break; + } + + case spv::OpGroupNonUniformShuffleUp: + { + GenericValue value(this, state, insn.word(4)); + GenericValue delta(this, state, insn.word(5)); + auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0)); + auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0)); + auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0)); + auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + SIMD::Int v = value.Int(i); + dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx)); + } + break; + } + + case spv::OpGroupNonUniformShuffleDown: + { + GenericValue value(this, state, insn.word(4)); + GenericValue delta(this, state, insn.word(5)); + auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0)); + auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0)); + auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0)); + auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0)); + for(auto i = 0u; i < type.sizeInComponents; i++) + { + SIMD::Int v = value.Int(i); + dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww)); + } + break; + } + + case spv::OpGroupNonUniformIAdd: + { + using Type = SIMD::Int; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(0), + [](RValue<Type> a, RValue<Type> b) { return a + b; }); + break; + } + + case spv::OpGroupNonUniformFAdd: + { + using Type = SIMD::Float; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(0.), + [](RValue<Type> a, RValue<Type> b) { return a + b; }); + break; + } + + case spv::OpGroupNonUniformIMul: + { + using Type = SIMD::Int; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(1), + [](RValue<Type> a, RValue<Type> b) { return a * b; }); + break; + } + + case spv::OpGroupNonUniformFMul: + { + using Type = SIMD::Float; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(1.), + [](RValue<Type> a, RValue<Type> b) { return a * b; }); + break; + } + + case spv::OpGroupNonUniformBitwiseAnd: + { + using Type = SIMD::UInt; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(~0u), + [](RValue<Type> a, RValue<Type> b) { return a & b; }); + break; + } + + case spv::OpGroupNonUniformBitwiseOr: + { + using Type = SIMD::UInt; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(0), + [](RValue<Type> a, RValue<Type> b) { return a | b; }); + break; + } + + case spv::OpGroupNonUniformBitwiseXor: + { + using Type = SIMD::UInt; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(0), + [](RValue<Type> a, RValue<Type> b) { return a ^ b; }); + break; + } + + case spv::OpGroupNonUniformSMin: + { + using Type = SIMD::Int; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(INT32_MAX), + [](RValue<Type> a, RValue<Type> b) { return Min(a, b); }); + break; + } + + case spv::OpGroupNonUniformUMin: + { + using Type = SIMD::UInt; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(~0u), + [](RValue<Type> a, RValue<Type> b) { return Min(a, b); }); + break; + } + + case spv::OpGroupNonUniformFMin: + { + using Type = SIMD::Float; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type::positive_inf(), + [](RValue<Type> a, RValue<Type> b) { return NMin(a, b); }); + break; + } + + case spv::OpGroupNonUniformSMax: + { + using Type = SIMD::Int; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(INT32_MIN), + [](RValue<Type> a, RValue<Type> b) { return Max(a, b); }); + break; + } + + case spv::OpGroupNonUniformUMax: + { + using Type = SIMD::UInt; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(0), + [](RValue<Type> a, RValue<Type> b) { return Max(a, b); }); + break; + } + + case spv::OpGroupNonUniformFMax: + { + using Type = SIMD::Float; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type::negative_inf(), + [](RValue<Type> a, RValue<Type> b) { return NMax(a, b); }); + break; + } + + case spv::OpGroupNonUniformLogicalAnd: + { + using Type = SIMD::UInt; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(~0u), + [](RValue<Type> a, RValue<Type> b) { + SIMD::UInt zero = SIMD::UInt(0); + return CmpNEQ(a, zero) & CmpNEQ(b, zero); + }); + break; + } + + case spv::OpGroupNonUniformLogicalOr: + { + using Type = SIMD::UInt; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(0), + [](RValue<Type> a, RValue<Type> b) { + SIMD::UInt zero = SIMD::UInt(0); + return CmpNEQ(a, zero) | CmpNEQ(b, zero); + }); + break; + } + + case spv::OpGroupNonUniformLogicalXor: + { + using Type = SIMD::UInt; + SpirvShader::GroupOps::BinaryOperation( + this, insn, state, dst, + Type(0), + [](RValue<Type> a, RValue<Type> b) { + SIMD::UInt zero = SIMD::UInt(0); + return CmpNEQ(a, zero) ^ CmpNEQ(b, zero); + }); + break; + } + default: - UNSUPPORTED("GroupOperation %d", int(operation)); - } - break; - } - - case spv::OpGroupNonUniformBallotFindLSB: - { - auto valueId = Object::ID(insn.word(4)); - ASSERT(type.sizeInComponents == 1); - ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); - GenericValue value(this, state, valueId); - dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true)); - break; - } - - case spv::OpGroupNonUniformBallotFindMSB: - { - auto valueId = Object::ID(insn.word(4)); - ASSERT(type.sizeInComponents == 1); - ASSERT(getType(getObject(valueId).type).sizeInComponents == 4); - GenericValue value(this, state, valueId); - dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false)); - break; - } - - case spv::OpGroupNonUniformShuffle: - { - GenericValue value(this, state, insn.word(4)); - GenericValue id(this, state, insn.word(5)); - auto x = CmpEQ(SIMD::Int(0), id.Int(0)); - auto y = CmpEQ(SIMD::Int(1), id.Int(0)); - auto z = CmpEQ(SIMD::Int(2), id.Int(0)); - auto w = CmpEQ(SIMD::Int(3), id.Int(0)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - SIMD::Int v = value.Int(i); - dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww)); - } - break; - } - - case spv::OpGroupNonUniformShuffleXor: - { - GenericValue value(this, state, insn.word(4)); - GenericValue mask(this, state, insn.word(5)); - auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); - auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); - auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); - auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - SIMD::Int v = value.Int(i); - dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww)); - } - break; - } - - case spv::OpGroupNonUniformShuffleUp: - { - GenericValue value(this, state, insn.word(4)); - GenericValue delta(this, state, insn.word(5)); - auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0)); - auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0)); - auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0)); - auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - SIMD::Int v = value.Int(i); - dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx)); - } - break; - } - - case spv::OpGroupNonUniformShuffleDown: - { - GenericValue value(this, state, insn.word(4)); - GenericValue delta(this, state, insn.word(5)); - auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0)); - auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0)); - auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0)); - auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0)); - for(auto i = 0u; i < type.sizeInComponents; i++) - { - SIMD::Int v = value.Int(i); - dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww)); - } - break; - } - - case spv::OpGroupNonUniformIAdd: - { - using Type = SIMD::Int; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(0), - [](RValue<Type>a, RValue<Type>b){ return a + b; } - ); - break; - } - - case spv::OpGroupNonUniformFAdd: - { - using Type = SIMD::Float; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(0.), - [](RValue<Type>a, RValue<Type>b){ return a + b; } - ); - break; - } - - case spv::OpGroupNonUniformIMul: - { - using Type = SIMD::Int; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(1), - [](RValue<Type>a, RValue<Type>b){ return a * b; } - ); - break; - } - - case spv::OpGroupNonUniformFMul: - { - using Type = SIMD::Float; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(1.), - [](RValue<Type>a, RValue<Type>b){ return a * b; } - ); - break; - } - - case spv::OpGroupNonUniformBitwiseAnd: - { - using Type = SIMD::UInt; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(~0u), - [](RValue<Type>a, RValue<Type>b){ return a & b; } - ); - break; - } - - case spv::OpGroupNonUniformBitwiseOr: - { - using Type = SIMD::UInt; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(0), - [](RValue<Type>a, RValue<Type>b){ return a | b; } - ); - break; - } - - case spv::OpGroupNonUniformBitwiseXor: - { - using Type = SIMD::UInt; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(0), - [](RValue<Type>a, RValue<Type>b){ return a ^ b; } - ); - break; - } - - case spv::OpGroupNonUniformSMin: - { - using Type = SIMD::Int; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(INT32_MAX), - [](RValue<Type>a, RValue<Type>b){ return Min(a, b); } - ); - break; - } - - case spv::OpGroupNonUniformUMin: - { - using Type = SIMD::UInt; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(~0u), - [](RValue<Type>a, RValue<Type>b){ return Min(a, b); } - ); - break; - } - - case spv::OpGroupNonUniformFMin: - { - using Type = SIMD::Float; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type::positive_inf(), - [](RValue<Type>a, RValue<Type>b){ return NMin(a, b); } - ); - break; - } - - case spv::OpGroupNonUniformSMax: - { - using Type = SIMD::Int; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(INT32_MIN), - [](RValue<Type>a, RValue<Type>b){ return Max(a, b); } - ); - break; - } - - case spv::OpGroupNonUniformUMax: - { - using Type = SIMD::UInt; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(0), - [](RValue<Type>a, RValue<Type>b){ return Max(a, b); } - ); - break; - } - - case spv::OpGroupNonUniformFMax: - { - using Type = SIMD::Float; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type::negative_inf(), - [](RValue<Type>a, RValue<Type>b){ return NMax(a, b); } - ); - break; - } - - case spv::OpGroupNonUniformLogicalAnd: - { - using Type = SIMD::UInt; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(~0u), - [](RValue<Type>a, RValue<Type>b){ - SIMD::UInt zero = SIMD::UInt(0); - return CmpNEQ(a, zero) & CmpNEQ(b, zero); - } - ); - break; - } - - case spv::OpGroupNonUniformLogicalOr: - { - using Type = SIMD::UInt; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(0), - [](RValue<Type>a, RValue<Type>b){ - SIMD::UInt zero = SIMD::UInt(0); - return CmpNEQ(a, zero) | CmpNEQ(b, zero); - } - ); - break; - } - - case spv::OpGroupNonUniformLogicalXor: - { - using Type = SIMD::UInt; - SpirvShader::GroupOps::BinaryOperation( - this, insn, state, dst, - Type(0), - [](RValue<Type>a, RValue<Type>b){ - SIMD::UInt zero = SIMD::UInt(0); - return CmpNEQ(a, zero) ^ CmpNEQ(b, zero); - } - ); - break; - } - - default: - UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str()); + UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str()); } return EmitResult::Continue; }
diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp index 5e7edcf..1ec6f89 100644 --- a/src/Pipeline/SpirvShaderImage.cpp +++ b/src/Pipeline/SpirvShaderImage.cpp
@@ -27,26 +27,26 @@ { switch(format) { - case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT; - case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT; - case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT; - case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT; - case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT; - case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT; - case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM; - case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM; - case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT; - case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT; - case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT; - case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT; - case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT; - case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT; - case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT; - case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT; + case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT; + case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT; + case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT; + case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT; + case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT; + case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT; + case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM; + case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM; + case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT; + case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT; + case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT; + case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT; + case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT; + case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT; + case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT; + case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT; - default: - UNIMPLEMENTED("SPIR-V ImageFormat %u", format); - return VK_FORMAT_UNDEFINED; + default: + UNIMPLEMENTED("SPIR-V ImageFormat %u", format); + return VK_FORMAT_UNDEFINED; } } @@ -57,21 +57,21 @@ sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f)); - return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // TODO: IfThenElse() + return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // TODO: IfThenElse() } -} // anonymous namespace +} // anonymous namespace namespace sw { SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const { - return EmitImageSample({variant, Implicit}, insn, state); + return EmitImageSample({ variant, Implicit }, insn, state); } SpirvShader::EmitResult SpirvShader::EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const { - ImageInstruction instruction = {variant, Gather}; + ImageInstruction instruction = { variant, Gather }; instruction.gatherComponent = !instruction.isDref() ? getObject(insn.word(5)).constantValue[0] : 0; return EmitImageSample(instruction, insn, state); @@ -85,19 +85,20 @@ if((imageOperands & spv::ImageOperandsLodMask) == imageOperands) { - return EmitImageSample({variant, Lod}, insn, state); + return EmitImageSample({ variant, Lod }, insn, state); } else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands) { - return EmitImageSample({variant, Grad}, insn, state); + return EmitImageSample({ variant, Grad }, insn, state); } - else UNIMPLEMENTED("Image Operands %x", imageOperands); + else + UNIMPLEMENTED("Image Operands %x", imageOperands); return EmitResult::Continue; } SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState *state) const { - return EmitImageSample({None, Fetch}, insn, state); + return EmitImageSample({ None, Fetch }, insn, state); } SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const @@ -109,18 +110,17 @@ auto &resultType = getType(resultTypeId); auto &result = state->createIntermediate(resultId, resultType.sizeInComponents); - auto imageDescriptor = state->getPointer(sampledImageId).base; // vk::SampledImageDescriptor* + auto imageDescriptor = state->getPointer(sampledImageId).base; // vk::SampledImageDescriptor* // If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor auto &sampledImage = getObject(sampledImageId); - auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ? - state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor; + auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ? state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor; auto coordinate = GenericValue(this, state, coordinateId); auto &coordinateType = getType(coordinate.type); - Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler* - Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture* + Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler* + Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture* // Above we assumed that if the SampledImage operand is not the result of an OpSampledImage, // it must be a combined image sampler loaded straight from the descriptor set. For OpImageFetch @@ -206,7 +206,7 @@ instruction.coordinates = coordinates; uint32_t i = 0; - for( ; i < coordinates; i++) + for(; i < coordinates; i++) { if(instruction.isProj()) { @@ -335,7 +335,7 @@ SpirvShader::EmitResult SpirvShader::EmitImageQueryLod(InsnIterator insn, EmitState *state) const { - return EmitImageSample({None, Query}, insn, state); + return EmitImageSample({ None, Query }, insn, state); } void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const @@ -359,23 +359,23 @@ switch(bindingLayout.descriptorType) { - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - { - extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]* - arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t - break; - } - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - { - extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]* - arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t - break; - } - default: - UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType)); + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + { + extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]* + arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t + break; + } + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + { + extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]* + arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t + break; + } + default: + UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType)); } auto dimensions = resultTy.sizeInComponents - (isArrayed ? 1 : 0); @@ -421,13 +421,13 @@ Int mipLevels = 0; switch(bindingLayout.descriptorType) { - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t - break; - default: - UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType)); + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t + break; + default: + UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType)); } auto &dst = state->createIntermediate(resultId, 1); @@ -455,16 +455,16 @@ Int sampleCount = 0; switch(bindingLayout.descriptorType) { - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t - break; - default: - UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType)); + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t + break; + default: + UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType)); } auto &dst = state->createIntermediate(resultId, 1); @@ -473,7 +473,7 @@ return EmitResult::Continue; } -SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, SIMD::Pointer ptr, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const +SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, SIMD::Pointer ptr, GenericValue const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const { auto routine = state->routine; bool isArrayed = imageType.definition.word(5) != 0; @@ -495,16 +495,16 @@ } auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect - ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes) - : OFFSET(vk::StorageImageDescriptor, rowPitchBytes)))); + ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes) + : OFFSET(vk::StorageImageDescriptor, rowPitchBytes)))); auto slicePitch = SIMD::Int( - *Pointer<Int>(descriptor + (useStencilAspect - ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes) - : OFFSET(vk::StorageImageDescriptor, slicePitchBytes)))); + *Pointer<Int>(descriptor + (useStencilAspect + ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes) + : OFFSET(vk::StorageImageDescriptor, slicePitchBytes)))); auto samplePitch = SIMD::Int( - *Pointer<Int>(descriptor + (useStencilAspect - ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes) - : OFFSET(vk::StorageImageDescriptor, samplePitchBytes)))); + *Pointer<Int>(descriptor + (useStencilAspect + ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes) + : OFFSET(vk::StorageImageDescriptor, samplePitchBytes)))); ptr += u * SIMD::Int(texelSize); if(dims > 1) @@ -568,13 +568,13 @@ // For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from // the renderpass data instead. In all other cases, we can use the format in the instruction. auto vkFormat = (dim == spv::DimSubpassData) - ? inputAttachmentFormats[d.InputAttachmentIndex] - : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8))); + ? inputAttachmentFormats[d.InputAttachmentIndex] + : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8))); // Depth+Stencil image attachments select aspect based on the Sampled Type of the // OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect. auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT && - getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt); + getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt); if(useStencilAspect) { @@ -584,8 +584,8 @@ auto pointer = state->getPointer(imageId); Pointer<Byte> binding = pointer.base; Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect - ? OFFSET(vk::StorageImageDescriptor, stencilPtr) - : OFFSET(vk::StorageImageDescriptor, ptr))); + ? OFFSET(vk::StorageImageDescriptor, stencilPtr) + : OFFSET(vk::StorageImageDescriptor, ptr))); auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes)); @@ -604,7 +604,7 @@ // Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch // of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits. // TODO: specialize for small formats? - for(auto i = 0; i < (texelSize + 3)/4; i++) + for(auto i = 0; i < (texelSize + 3) / 4; i++) { packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4)); texelPtr += sizeof(float); @@ -615,214 +615,214 @@ // - Any format supported as a color or depth/stencil attachment, for input attachments switch(vkFormat) { - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SINT: - case VK_FORMAT_R32G32B32A32_UINT: - dst.move(0, packed[0]); - dst.move(1, packed[1]); - dst.move(2, packed[2]); - dst.move(3, packed[3]); - break; - case VK_FORMAT_R32_SINT: - case VK_FORMAT_R32_UINT: - dst.move(0, packed[0]); - // Fill remaining channels with 0,0,1 (of the correct type) - dst.move(1, SIMD::Int(0)); - dst.move(2, SIMD::Int(0)); - dst.move(3, SIMD::Int(1)); - break; - case VK_FORMAT_R32_SFLOAT: - case VK_FORMAT_D32_SFLOAT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - dst.move(0, packed[0]); - // Fill remaining channels with 0,0,1 (of the correct type) - dst.move(1, SIMD::Float(0)); - dst.move(2, SIMD::Float(0)); - dst.move(3, SIMD::Float(1)); - break; - case VK_FORMAT_D16_UNORM: - dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f)); - dst.move(1, SIMD::Float(0)); - dst.move(2, SIMD::Float(0)); - dst.move(3, SIMD::Float(1)); - break; - case VK_FORMAT_R16G16B16A16_SINT: - dst.move(0, (packed[0] << 16) >> 16); - dst.move(1, (packed[0]) >> 16); - dst.move(2, (packed[1] << 16) >> 16); - dst.move(3, (packed[1]) >> 16); - break; - case VK_FORMAT_R16G16B16A16_UINT: - dst.move(0, packed[0] & SIMD::Int(0xffff)); - dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff)); - dst.move(2, packed[1] & SIMD::Int(0xffff)); - dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff)); - break; - case VK_FORMAT_R16G16B16A16_SFLOAT: - dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); - dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16)); - dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF))); - dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16)); - break; - case VK_FORMAT_R8G8B8A8_SNORM: - dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - dst.move(1, Min(Max(SIMD::Float(((packed[0]<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - dst.move(2, Min(Max(SIMD::Float(((packed[0]<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); - break; - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(2, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - break; - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_A8B8G8R8_SRGB_PACK32: - dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); - dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); - dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); - dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - break; - case VK_FORMAT_B8G8R8A8_UNORM: - dst.move(0, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - break; - case VK_FORMAT_B8G8R8A8_SRGB: - dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); - dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); - dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); - dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - break; - case VK_FORMAT_R8G8B8A8_UINT: - case VK_FORMAT_A8B8G8R8_UINT_PACK32: - dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF))); - dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF))); - dst.move(2, ((As<SIMD::UInt>(packed[0])>>16) & SIMD::UInt(0xFF))); - dst.move(3, ((As<SIMD::UInt>(packed[0])>>24) & SIMD::UInt(0xFF))); - break; - case VK_FORMAT_R8G8B8A8_SINT: - case VK_FORMAT_A8B8G8R8_SINT_PACK32: - dst.move(0, (packed[0] << 24) >> 24); - dst.move(1, (packed[0] << 16) >> 24); - dst.move(2, (packed[0] << 8) >> 24); - dst.move(3, (packed[0]) >> 24); - break; - case VK_FORMAT_R8_UNORM: - dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(1, SIMD::Float(0)); - dst.move(2, SIMD::Float(0)); - dst.move(3, SIMD::Float(1)); - break; - case VK_FORMAT_R8_UINT: - case VK_FORMAT_S8_UINT: - dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF))); - dst.move(1, SIMD::UInt(0)); - dst.move(2, SIMD::UInt(0)); - dst.move(3, SIMD::UInt(1)); - break; - case VK_FORMAT_R8_SINT: - dst.move(0, (packed[0] << 24) >> 24); - dst.move(1, SIMD::Int(0)); - dst.move(2, SIMD::Int(0)); - dst.move(3, SIMD::Int(1)); - break; - case VK_FORMAT_R8G8_UNORM: - dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); - dst.move(2, SIMD::Float(0)); - dst.move(3, SIMD::Float(1)); - break; - case VK_FORMAT_R8G8_UINT: - dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF))); - dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF))); - dst.move(2, SIMD::UInt(0)); - dst.move(3, SIMD::UInt(1)); - break; - case VK_FORMAT_R8G8_SINT: - dst.move(0, (packed[0] << 24) >> 24); - dst.move(1, (packed[0] << 16) >> 24); - dst.move(2, SIMD::Int(0)); - dst.move(3, SIMD::Int(1)); - break; - case VK_FORMAT_R16_SFLOAT: - dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); - dst.move(1, SIMD::Float(0)); - dst.move(2, SIMD::Float(0)); - dst.move(3, SIMD::Float(1)); - break; - case VK_FORMAT_R16_UINT: - dst.move(0, packed[0] & SIMD::Int(0xffff)); - dst.move(1, SIMD::UInt(0)); - dst.move(2, SIMD::UInt(0)); - dst.move(3, SIMD::UInt(1)); - break; - case VK_FORMAT_R16_SINT: - dst.move(0, (packed[0] << 16) >> 16); - dst.move(1, SIMD::Int(0)); - dst.move(2, SIMD::Int(0)); - dst.move(3, SIMD::Int(1)); - break; - case VK_FORMAT_R16G16_SFLOAT: - dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); - dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16)); - dst.move(2, SIMD::Float(0)); - dst.move(3, SIMD::Float(1)); - break; - case VK_FORMAT_R16G16_UINT: - dst.move(0, packed[0] & SIMD::Int(0xffff)); - dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff)); - dst.move(2, SIMD::UInt(0)); - dst.move(3, SIMD::UInt(1)); - break; - case VK_FORMAT_R16G16_SINT: - dst.move(0, (packed[0] << 16) >> 16); - dst.move(1, (packed[0]) >> 16); - dst.move(2, SIMD::Int(0)); - dst.move(3, SIMD::Int(1)); - break; - case VK_FORMAT_R32G32_SINT: - case VK_FORMAT_R32G32_UINT: - dst.move(0, packed[0]); - dst.move(1, packed[1]); - dst.move(2, SIMD::Int(0)); - dst.move(3, SIMD::Int(1)); - break; - case VK_FORMAT_R32G32_SFLOAT: - dst.move(0, packed[0]); - dst.move(1, packed[1]); - dst.move(2, SIMD::Float(0)); - dst.move(3, SIMD::Float(1)); - break; - case VK_FORMAT_A2B10G10R10_UINT_PACK32: - dst.move(0, (packed[0]) & SIMD::Int(0x3FF)); - dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF)); - dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF)); - dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3)); - break; - case VK_FORMAT_A2B10G10R10_UNORM_PACK32: - dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF)); - dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF)); - dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF)); - dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3)); - break; - case VK_FORMAT_R5G6B5_UNORM_PACK16: - dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); - dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F)); - dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); - dst.move(3, SIMD::Float(1)); - break; - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); - dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); - dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); - dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1))); - break; - default: - UNIMPLEMENTED("VkFormat %d", int(vkFormat)); - break; + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SINT: + case VK_FORMAT_R32G32B32A32_UINT: + dst.move(0, packed[0]); + dst.move(1, packed[1]); + dst.move(2, packed[2]); + dst.move(3, packed[3]); + break; + case VK_FORMAT_R32_SINT: + case VK_FORMAT_R32_UINT: + dst.move(0, packed[0]); + // Fill remaining channels with 0,0,1 (of the correct type) + dst.move(1, SIMD::Int(0)); + dst.move(2, SIMD::Int(0)); + dst.move(3, SIMD::Int(1)); + break; + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_D32_SFLOAT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + dst.move(0, packed[0]); + // Fill remaining channels with 0,0,1 (of the correct type) + dst.move(1, SIMD::Float(0)); + dst.move(2, SIMD::Float(0)); + dst.move(3, SIMD::Float(1)); + break; + case VK_FORMAT_D16_UNORM: + dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f)); + dst.move(1, SIMD::Float(0)); + dst.move(2, SIMD::Float(0)); + dst.move(3, SIMD::Float(1)); + break; + case VK_FORMAT_R16G16B16A16_SINT: + dst.move(0, (packed[0] << 16) >> 16); + dst.move(1, (packed[0]) >> 16); + dst.move(2, (packed[1] << 16) >> 16); + dst.move(3, (packed[1]) >> 16); + break; + case VK_FORMAT_R16G16B16A16_UINT: + dst.move(0, packed[0] & SIMD::Int(0xffff)); + dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff)); + dst.move(2, packed[1] & SIMD::Int(0xffff)); + dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff)); + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); + dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16)); + dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF))); + dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16)); + break; + case VK_FORMAT_R8G8B8A8_SNORM: + dst.move(0, Min(Max(SIMD::Float(((packed[0] << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); + dst.move(1, Min(Max(SIMD::Float(((packed[0] << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); + dst.move(2, Min(Max(SIMD::Float(((packed[0] << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); + dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f))); + break; + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(2, SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + break; + case VK_FORMAT_R8G8B8A8_SRGB: + case VK_FORMAT_A8B8G8R8_SRGB_PACK32: + dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); + dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); + dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); + dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + break; + case VK_FORMAT_B8G8R8A8_UNORM: + dst.move(0, SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + break; + case VK_FORMAT_B8G8R8A8_SRGB: + dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); + dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); + dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f))); + dst.move(3, SIMD::Float(((packed[0] >> 24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + break; + case VK_FORMAT_R8G8B8A8_UINT: + case VK_FORMAT_A8B8G8R8_UINT_PACK32: + dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF))); + dst.move(1, ((As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF))); + dst.move(2, ((As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF))); + dst.move(3, ((As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF))); + break; + case VK_FORMAT_R8G8B8A8_SINT: + case VK_FORMAT_A8B8G8R8_SINT_PACK32: + dst.move(0, (packed[0] << 24) >> 24); + dst.move(1, (packed[0] << 16) >> 24); + dst.move(2, (packed[0] << 8) >> 24); + dst.move(3, (packed[0]) >> 24); + break; + case VK_FORMAT_R8_UNORM: + dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(1, SIMD::Float(0)); + dst.move(2, SIMD::Float(0)); + dst.move(3, SIMD::Float(1)); + break; + case VK_FORMAT_R8_UINT: + case VK_FORMAT_S8_UINT: + dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF))); + dst.move(1, SIMD::UInt(0)); + dst.move(2, SIMD::UInt(0)); + dst.move(3, SIMD::UInt(1)); + break; + case VK_FORMAT_R8_SINT: + dst.move(0, (packed[0] << 24) >> 24); + dst.move(1, SIMD::Int(0)); + dst.move(2, SIMD::Int(0)); + dst.move(3, SIMD::Int(1)); + break; + case VK_FORMAT_R8G8_UNORM: + dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(1, SIMD::Float(((packed[0] >> 8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)); + dst.move(2, SIMD::Float(0)); + dst.move(3, SIMD::Float(1)); + break; + case VK_FORMAT_R8G8_UINT: + dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF))); + dst.move(1, ((As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF))); + dst.move(2, SIMD::UInt(0)); + dst.move(3, SIMD::UInt(1)); + break; + case VK_FORMAT_R8G8_SINT: + dst.move(0, (packed[0] << 24) >> 24); + dst.move(1, (packed[0] << 16) >> 24); + dst.move(2, SIMD::Int(0)); + dst.move(3, SIMD::Int(1)); + break; + case VK_FORMAT_R16_SFLOAT: + dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); + dst.move(1, SIMD::Float(0)); + dst.move(2, SIMD::Float(0)); + dst.move(3, SIMD::Float(1)); + break; + case VK_FORMAT_R16_UINT: + dst.move(0, packed[0] & SIMD::Int(0xffff)); + dst.move(1, SIMD::UInt(0)); + dst.move(2, SIMD::UInt(0)); + dst.move(3, SIMD::UInt(1)); + break; + case VK_FORMAT_R16_SINT: + dst.move(0, (packed[0] << 16) >> 16); + dst.move(1, SIMD::Int(0)); + dst.move(2, SIMD::Int(0)); + dst.move(3, SIMD::Int(1)); + break; + case VK_FORMAT_R16G16_SFLOAT: + dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF))); + dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16)); + dst.move(2, SIMD::Float(0)); + dst.move(3, SIMD::Float(1)); + break; + case VK_FORMAT_R16G16_UINT: + dst.move(0, packed[0] & SIMD::Int(0xffff)); + dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff)); + dst.move(2, SIMD::UInt(0)); + dst.move(3, SIMD::UInt(1)); + break; + case VK_FORMAT_R16G16_SINT: + dst.move(0, (packed[0] << 16) >> 16); + dst.move(1, (packed[0]) >> 16); + dst.move(2, SIMD::Int(0)); + dst.move(3, SIMD::Int(1)); + break; + case VK_FORMAT_R32G32_SINT: + case VK_FORMAT_R32G32_UINT: + dst.move(0, packed[0]); + dst.move(1, packed[1]); + dst.move(2, SIMD::Int(0)); + dst.move(3, SIMD::Int(1)); + break; + case VK_FORMAT_R32G32_SFLOAT: + dst.move(0, packed[0]); + dst.move(1, packed[1]); + dst.move(2, SIMD::Float(0)); + dst.move(3, SIMD::Float(1)); + break; + case VK_FORMAT_A2B10G10R10_UINT_PACK32: + dst.move(0, (packed[0]) & SIMD::Int(0x3FF)); + dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF)); + dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF)); + dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3)); + break; + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF)); + dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF)); + dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF)); + dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3)); + break; + case VK_FORMAT_R5G6B5_UNORM_PACK16: + dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); + dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F)); + dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); + dst.move(3, SIMD::Float(1)); + break; + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); + dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); + dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F)); + dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1))); + break; + default: + UNIMPLEMENTED("VkFormat %d", int(vkFormat)); + break; } return EmitResult::Continue; @@ -852,103 +852,106 @@ auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8)); switch(format) { - case spv::ImageFormatRgba32f: - case spv::ImageFormatRgba32i: - case spv::ImageFormatRgba32ui: - texelSize = 16; - packed[0] = texel.Int(0); - packed[1] = texel.Int(1); - packed[2] = texel.Int(2); - packed[3] = texel.Int(3); - numPackedElements = 4; - break; - case spv::ImageFormatR32f: - case spv::ImageFormatR32i: - case spv::ImageFormatR32ui: - texelSize = 4; - packed[0] = texel.Int(0); - numPackedElements = 1; - break; - case spv::ImageFormatRgba8: - texelSize = 4; - packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) | - ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) | - ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) | - ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24); - numPackedElements = 1; - break; - case spv::ImageFormatRgba8Snorm: - texelSize = 4; - packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & - SIMD::Int(0xFF)) | - ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & - SIMD::Int(0xFF)) << 8) | - ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & - SIMD::Int(0xFF)) << 16) | - ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & - SIMD::Int(0xFF)) << 24); - numPackedElements = 1; - break; - case spv::ImageFormatRgba8i: - case spv::ImageFormatRgba8ui: - texelSize = 4; - packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) | - (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) | - (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) | - (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24); - numPackedElements = 1; - break; - case spv::ImageFormatRgba16f: - texelSize = 8; - packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true); - packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true); - numPackedElements = 2; - break; - case spv::ImageFormatRgba16i: - case spv::ImageFormatRgba16ui: - texelSize = 8; - packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16); - packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16); - numPackedElements = 2; - break; - case spv::ImageFormatRg32f: - case spv::ImageFormatRg32i: - case spv::ImageFormatRg32ui: - texelSize = 8; - packed[0] = texel.Int(0); - packed[1] = texel.Int(1); - numPackedElements = 2; - break; + case spv::ImageFormatRgba32f: + case spv::ImageFormatRgba32i: + case spv::ImageFormatRgba32ui: + texelSize = 16; + packed[0] = texel.Int(0); + packed[1] = texel.Int(1); + packed[2] = texel.Int(2); + packed[3] = texel.Int(3); + numPackedElements = 4; + break; + case spv::ImageFormatR32f: + case spv::ImageFormatR32i: + case spv::ImageFormatR32ui: + texelSize = 4; + packed[0] = texel.Int(0); + numPackedElements = 1; + break; + case spv::ImageFormatRgba8: + texelSize = 4; + packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) | + ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) | + ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) | + ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24); + numPackedElements = 1; + break; + case spv::ImageFormatRgba8Snorm: + texelSize = 4; + packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & + SIMD::Int(0xFF)) | + ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & + SIMD::Int(0xFF)) + << 8) | + ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & + SIMD::Int(0xFF)) + << 16) | + ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) & + SIMD::Int(0xFF)) + << 24); + numPackedElements = 1; + break; + case spv::ImageFormatRgba8i: + case spv::ImageFormatRgba8ui: + texelSize = 4; + packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) | + (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) | + (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) | + (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24); + numPackedElements = 1; + break; + case spv::ImageFormatRgba16f: + texelSize = 8; + packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true); + packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true); + numPackedElements = 2; + break; + case spv::ImageFormatRgba16i: + case spv::ImageFormatRgba16ui: + texelSize = 8; + packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16); + packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16); + numPackedElements = 2; + break; + case spv::ImageFormatRg32f: + case spv::ImageFormatRg32i: + case spv::ImageFormatRg32ui: + texelSize = 8; + packed[0] = texel.Int(0); + packed[1] = texel.Int(1); + numPackedElements = 2; + break; - case spv::ImageFormatRg16f: - case spv::ImageFormatR11fG11fB10f: - case spv::ImageFormatR16f: - case spv::ImageFormatRgba16: - case spv::ImageFormatRgb10A2: - case spv::ImageFormatRg16: - case spv::ImageFormatRg8: - case spv::ImageFormatR16: - case spv::ImageFormatR8: - case spv::ImageFormatRgba16Snorm: - case spv::ImageFormatRg16Snorm: - case spv::ImageFormatRg8Snorm: - case spv::ImageFormatR16Snorm: - case spv::ImageFormatR8Snorm: - case spv::ImageFormatRg16i: - case spv::ImageFormatRg8i: - case spv::ImageFormatR16i: - case spv::ImageFormatR8i: - case spv::ImageFormatRgb10a2ui: - case spv::ImageFormatRg16ui: - case spv::ImageFormatRg8ui: - case spv::ImageFormatR16ui: - case spv::ImageFormatR8ui: - UNIMPLEMENTED("spv::ImageFormat %d", int(format)); - break; + case spv::ImageFormatRg16f: + case spv::ImageFormatR11fG11fB10f: + case spv::ImageFormatR16f: + case spv::ImageFormatRgba16: + case spv::ImageFormatRgb10A2: + case spv::ImageFormatRg16: + case spv::ImageFormatRg8: + case spv::ImageFormatR16: + case spv::ImageFormatR8: + case spv::ImageFormatRgba16Snorm: + case spv::ImageFormatRg16Snorm: + case spv::ImageFormatRg8Snorm: + case spv::ImageFormatR16Snorm: + case spv::ImageFormatR8Snorm: + case spv::ImageFormatRg16i: + case spv::ImageFormatRg8i: + case spv::ImageFormatR16i: + case spv::ImageFormatR8i: + case spv::ImageFormatRgb10a2ui: + case spv::ImageFormatRg16ui: + case spv::ImageFormatRg8ui: + case spv::ImageFormatR16ui: + case spv::ImageFormatR8ui: + UNIMPLEMENTED("spv::ImageFormat %d", int(format)); + break; - default: - UNREACHABLE("spv::ImageFormat %d", int(format)); - break; + default: + UNREACHABLE("spv::ImageFormat %d", int(format)); + break; } auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
diff --git a/src/Pipeline/SpirvShaderMemory.cpp b/src/Pipeline/SpirvShaderMemory.cpp index c172d3c..8df565f 100644 --- a/src/Pipeline/SpirvShaderMemory.cpp +++ b/src/Pipeline/SpirvShaderMemory.cpp
@@ -57,8 +57,7 @@ auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents); auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass); - VisitMemoryObject(pointerId, [&](const MemoryElement& el) - { + VisitMemoryObject(pointerId, [&](const MemoryElement &el) { auto p = ptr + el.offset; if(interleavedByLane) { p = InterleaveByLane(p); } // TODO: Interleave once, then add offset? dst.move(el.index, p.Load<SIMD::Float>(robustness, state->activeLaneMask(), atomic, memoryOrder)); @@ -101,8 +100,7 @@ { // Constant source data. const uint32_t *src = object.constantValue.get(); - VisitMemoryObject(pointerId, [&](const MemoryElement& el) - { + VisitMemoryObject(pointerId, [&](const MemoryElement &el) { auto p = ptr + el.offset; if(interleavedByLane) { p = InterleaveByLane(p); } p.Store(SIMD::Int(src[el.index]), robustness, mask, atomic, memoryOrder); @@ -112,8 +110,7 @@ { // Intermediate source data. auto &src = state->getIntermediate(objectId); - VisitMemoryObject(pointerId, [&](const MemoryElement& el) - { + VisitMemoryObject(pointerId, [&](const MemoryElement &el) { auto p = ptr + el.offset; if(interleavedByLane) { p = InterleaveByLane(p); } p.Store(src.Float(el.index), robustness, mask, atomic, memoryOrder); @@ -132,95 +129,95 @@ switch(objectTy.storageClass) { - case spv::StorageClassOutput: - case spv::StorageClassPrivate: - case spv::StorageClassFunction: - { - ASSERT(objectTy.opcode() == spv::OpTypePointer); - auto base = &routine->getVariable(resultId)[0]; - auto elementTy = getType(objectTy.element); - auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width; - state->createPointer(resultId, SIMD::Pointer(base, size)); - break; - } - case spv::StorageClassWorkgroup: - { - ASSERT(objectTy.opcode() == spv::OpTypePointer); - auto base = &routine->workgroupMemory[0]; - auto size = workgroupMemory.size(); - state->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId))); - break; - } - case spv::StorageClassInput: - { - if(object.kind == Object::Kind::InterfaceVariable) + case spv::StorageClassOutput: + case spv::StorageClassPrivate: + case spv::StorageClassFunction: { - auto &dst = routine->getVariable(resultId); - int offset = 0; - VisitInterface(resultId, - [&](Decorations const &d, AttribType type) { - auto scalarSlot = d.Location << 2 | d.Component; - dst[offset++] = routine->inputs[scalarSlot]; - }); + ASSERT(objectTy.opcode() == spv::OpTypePointer); + auto base = &routine->getVariable(resultId)[0]; + auto elementTy = getType(objectTy.element); + auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width; + state->createPointer(resultId, SIMD::Pointer(base, size)); + break; } - ASSERT(objectTy.opcode() == spv::OpTypePointer); - auto base = &routine->getVariable(resultId)[0]; - auto elementTy = getType(objectTy.element); - auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width; - state->createPointer(resultId, SIMD::Pointer(base, size)); - break; - } - case spv::StorageClassUniformConstant: - { - const auto &d = descriptorDecorations.at(resultId); - ASSERT(d.DescriptorSet >= 0); - ASSERT(d.Binding >= 0); + case spv::StorageClassWorkgroup: + { + ASSERT(objectTy.opcode() == spv::OpTypePointer); + auto base = &routine->workgroupMemory[0]; + auto size = workgroupMemory.size(); + state->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId))); + break; + } + case spv::StorageClassInput: + { + if(object.kind == Object::Kind::InterfaceVariable) + { + auto &dst = routine->getVariable(resultId); + int offset = 0; + VisitInterface(resultId, + [&](Decorations const &d, AttribType type) { + auto scalarSlot = d.Location << 2 | d.Component; + dst[offset++] = routine->inputs[scalarSlot]; + }); + } + ASSERT(objectTy.opcode() == spv::OpTypePointer); + auto base = &routine->getVariable(resultId)[0]; + auto elementTy = getType(objectTy.element); + auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width; + state->createPointer(resultId, SIMD::Pointer(base, size)); + break; + } + case spv::StorageClassUniformConstant: + { + const auto &d = descriptorDecorations.at(resultId); + ASSERT(d.DescriptorSet >= 0); + ASSERT(d.Binding >= 0); - uint32_t arrayIndex = 0; // TODO(b/129523279) - auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet); - if(setLayout->hasBinding(d.Binding)) - { - uint32_t bindingOffset = static_cast<uint32_t>(setLayout->getBindingOffset(d.Binding, arrayIndex)); - Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet* - Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor* - auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write. - state->createPointer(resultId, SIMD::Pointer(binding, size)); + uint32_t arrayIndex = 0; // TODO(b/129523279) + auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet); + if(setLayout->hasBinding(d.Binding)) + { + uint32_t bindingOffset = static_cast<uint32_t>(setLayout->getBindingOffset(d.Binding, arrayIndex)); + Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet* + Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor* + auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write. + state->createPointer(resultId, SIMD::Pointer(binding, size)); + } + else + { + // TODO: Error if the variable with the non-existant binding is + // used? Or perhaps strip these unused variable declarations as + // a preprocess on the SPIR-V? + } + break; } - else + case spv::StorageClassUniform: + case spv::StorageClassStorageBuffer: { - // TODO: Error if the variable with the non-existant binding is - // used? Or perhaps strip these unused variable declarations as - // a preprocess on the SPIR-V? + const auto &d = descriptorDecorations.at(resultId); + ASSERT(d.DescriptorSet >= 0); + auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write. + // Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number + // of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this + // is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it. + if(d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS) + { + state->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size)); + } + else + { + state->createPointer(resultId, SIMD::Pointer(nullptr, 0)); + } + break; } - break; - } - case spv::StorageClassUniform: - case spv::StorageClassStorageBuffer: - { - const auto &d = descriptorDecorations.at(resultId); - ASSERT(d.DescriptorSet >= 0); - auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write. - // Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number - // of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this - // is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it. - if(d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS) + case spv::StorageClassPushConstant: { - state->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size)); + state->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE)); + break; } - else - { - state->createPointer(resultId, SIMD::Pointer(nullptr, 0)); - } - break; - } - case spv::StorageClassPushConstant: - { - state->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE)); - break; - } - default: - UNREACHABLE("Storage class %d", objectTy.storageClass); - break; + default: + UNREACHABLE("Storage class %d", objectTy.storageClass); + break; } if(insn.wordCount() > 4) @@ -232,24 +229,23 @@ } switch(objectTy.storageClass) { - case spv::StorageClassOutput: - case spv::StorageClassPrivate: - case spv::StorageClassFunction: - { - bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass); - auto ptr = GetPointerToData(resultId, 0, state); - GenericValue initialValue(this, state, initializerId); - VisitMemoryObject(resultId, [&](const MemoryElement& el) + case spv::StorageClassOutput: + case spv::StorageClassPrivate: + case spv::StorageClassFunction: { - auto p = ptr + el.offset; - if(interleavedByLane) { p = InterleaveByLane(p); } - auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds. - p.Store(initialValue.Float(el.index), robustness, state->activeLaneMask()); - }); - break; - } - default: - ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass)); + bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass); + auto ptr = GetPointerToData(resultId, 0, state); + GenericValue initialValue(this, state, initializerId); + VisitMemoryObject(resultId, [&](const MemoryElement &el) { + auto p = ptr + el.offset; + if(interleavedByLane) { p = InterleaveByLane(p); } + auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds. + p.Store(initialValue.Float(el.index), robustness, state->activeLaneMask()); + }); + break; + } + default: + ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass)); } } @@ -271,10 +267,9 @@ std::unordered_map<uint32_t, uint32_t> srcOffsets; - VisitMemoryObject(srcPtrId, [&](const MemoryElement& el) { srcOffsets[el.index] = el.offset; }); + VisitMemoryObject(srcPtrId, [&](const MemoryElement &el) { srcOffsets[el.index] = el.offset; }); - VisitMemoryObject(dstPtrId, [&](const MemoryElement& el) - { + VisitMemoryObject(dstPtrId, [&](const MemoryElement &el) { auto it = srcOffsets.find(el.index); ASSERT(it != srcOffsets.end()); auto srcOffset = it->second; @@ -303,7 +298,7 @@ return EmitResult::Continue; } -void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t& index, uint32_t offset, const MemoryVisitor& f) const +void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t &index, uint32_t offset, const MemoryVisitor &f) const { ApplyDecorationsForId(&d, id); auto const &type = getType(id); @@ -316,60 +311,60 @@ switch(type.opcode()) { - case spv::OpTypePointer: - VisitMemoryObjectInner(type.definition.word(3), d, index, offset, f); - break; - case spv::OpTypeInt: - case spv::OpTypeFloat: - case spv::OpTypeRuntimeArray: - f(MemoryElement{index++, offset, type}); - break; - case spv::OpTypeVector: - { - auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float)); - for(auto i = 0u; i < type.definition.word(3); i++) + case spv::OpTypePointer: + VisitMemoryObjectInner(type.definition.word(3), d, index, offset, f); + break; + case spv::OpTypeInt: + case spv::OpTypeFloat: + case spv::OpTypeRuntimeArray: + f(MemoryElement{ index++, offset, type }); + break; + case spv::OpTypeVector: { - VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f); + auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float)); + for(auto i = 0u; i < type.definition.word(3); i++) + { + VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f); + } + break; } - break; - } - case spv::OpTypeMatrix: - { - auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride; - d.InsideMatrix = true; - for(auto i = 0u; i < type.definition.word(3); i++) + case spv::OpTypeMatrix: { - ASSERT(d.HasMatrixStride); - VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f); + auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride; + d.InsideMatrix = true; + for(auto i = 0u; i < type.definition.word(3); i++) + { + ASSERT(d.HasMatrixStride); + VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f); + } + break; } - break; - } - case spv::OpTypeStruct: - for(auto i = 0u; i < type.definition.wordCount() - 2; i++) + case spv::OpTypeStruct: + for(auto i = 0u; i < type.definition.wordCount() - 2; i++) + { + ApplyDecorationsForIdMember(&d, id, i); + VisitMemoryObjectInner(type.definition.word(i + 2), d, index, offset, f); + } + break; + case spv::OpTypeArray: { - ApplyDecorationsForIdMember(&d, id, i); - VisitMemoryObjectInner(type.definition.word(i + 2), d, index, offset, f); + auto arraySize = GetConstScalarInt(type.definition.word(3)); + for(auto i = 0u; i < arraySize; i++) + { + ASSERT(d.HasArrayStride); + VisitMemoryObjectInner(type.definition.word(2), d, index, offset + i * d.ArrayStride, f); + } + break; } - break; - case spv::OpTypeArray: - { - auto arraySize = GetConstScalarInt(type.definition.word(3)); - for(auto i = 0u; i < arraySize; i++) - { - ASSERT(d.HasArrayStride); - VisitMemoryObjectInner(type.definition.word(2), d, index, offset + i * d.ArrayStride, f); - } - break; - } - default: - UNREACHABLE("%s", OpcodeName(type.opcode()).c_str()); + default: + UNREACHABLE("%s", OpcodeName(type.opcode()).c_str()); } } -void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, const MemoryVisitor& f) const +void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, const MemoryVisitor &f) const { auto typeId = getObject(id).type; - auto const & type = getType(typeId); + auto const &type = getType(typeId); if(IsExplicitLayout(type.storageClass)) { Decorations d{}; @@ -384,7 +379,7 @@ for(auto index = 0u; index < elType.sizeInComponents; index++) { auto offset = static_cast<uint32_t>(index * sizeof(float)); - f({index, offset, elType}); + f({ index, offset, elType }); } } } @@ -411,15 +406,15 @@ ASSERT_MSG(setLayout->hasBinding(d.Binding), "Descriptor set %d does not contain binding %d", int(d.DescriptorSet), int(d.Binding)); int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex)); - Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor* - Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void* + Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor* + Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void* Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes)); if(setLayout->isBindingDynamic(d.Binding)) { uint32_t dynamicBindingIndex = - routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) + - setLayout->getDynamicDescriptorOffset(d.Binding) + - arrayIndex; + routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) + + setLayout->getDynamicDescriptorOffset(d.Binding) + + arrayIndex; Int offset = routine->descriptorDynamicOffsets[dynamicBindingIndex]; Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize)); return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset)); @@ -439,23 +434,22 @@ std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics) { auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>( - spv::MemorySemanticsAcquireMask | - spv::MemorySemanticsReleaseMask | - spv::MemorySemanticsAcquireReleaseMask | - spv::MemorySemanticsSequentiallyConsistentMask - ); + spv::MemorySemanticsAcquireMask | + spv::MemorySemanticsReleaseMask | + spv::MemorySemanticsAcquireReleaseMask | + spv::MemorySemanticsSequentiallyConsistentMask); switch(control) { - case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed; - case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire; - case spv::MemorySemanticsReleaseMask: return std::memory_order_release; - case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel; - case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease" - default: - // "it is invalid for more than one of these four bits to be set: - // Acquire, Release, AcquireRelease, or SequentiallyConsistent." - UNREACHABLE("MemorySemanticsMask: %x", int(control)); - return std::memory_order_acq_rel; + case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed; + case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire; + case spv::MemorySemanticsReleaseMask: return std::memory_order_release; + case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel; + case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease" + default: + // "it is invalid for more than one of these four bits to be set: + // Acquire, Release, AcquireRelease, or SequentiallyConsistent." + UNREACHABLE("MemorySemanticsMask: %x", int(control)); + return std::memory_order_acq_rel; } } @@ -463,12 +457,12 @@ { switch(storageClass) { - case spv::StorageClassUniform: - case spv::StorageClassStorageBuffer: - case spv::StorageClassImage: - return false; - default: - return true; + case spv::StorageClassUniform: + case spv::StorageClassStorageBuffer: + case spv::StorageClassImage: + return false; + default: + return true; } } @@ -476,12 +470,12 @@ { switch(storageClass) { - case spv::StorageClassUniform: - case spv::StorageClassStorageBuffer: - case spv::StorageClassPushConstant: - return true; - default: - return false; + case spv::StorageClassUniform: + case spv::StorageClassStorageBuffer: + case spv::StorageClassPushConstant: + return true; + default: + return false; } } @@ -499,14 +493,14 @@ { switch(storageClass) { - case spv::StorageClassUniform: - case spv::StorageClassStorageBuffer: - case spv::StorageClassPushConstant: - case spv::StorageClassWorkgroup: - case spv::StorageClassImage: - return false; - default: - return true; + case spv::StorageClassUniform: + case spv::StorageClassStorageBuffer: + case spv::StorageClassPushConstant: + case spv::StorageClassWorkgroup: + case spv::StorageClassImage: + return false; + default: + return true; } }
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp index 1d9694a..45deba6 100644 --- a/src/Pipeline/SpirvShaderSampling.cpp +++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -14,14 +14,14 @@ #include "SpirvShader.hpp" -#include "SamplerCore.hpp" // TODO: Figure out what's needed. +#include "SamplerCore.hpp" // TODO: Figure out what's needed. +#include "Device/Config.hpp" #include "System/Math.hpp" #include "Vulkan/VkDebug.hpp" #include "Vulkan/VkDescriptorSetLayout.hpp" #include "Vulkan/VkDevice.hpp" #include "Vulkan/VkImageView.hpp" #include "Vulkan/VkSampler.hpp" -#include "Device/Config.hpp" #include <spirv/unified1/spirv.hpp> @@ -36,22 +36,22 @@ const auto samplerId = sampler ? sampler->id : 0; ASSERT(imageDescriptor->imageViewId != 0 && (samplerId != 0 || instruction.samplerMethod == Fetch)); - vk::Device::SamplingRoutineCache::Key key = {inst, imageDescriptor->imageViewId, samplerId}; + vk::Device::SamplingRoutineCache::Key key = { inst, imageDescriptor->imageViewId, samplerId }; ASSERT(imageDescriptor->device); if(auto routine = imageDescriptor->device->findInConstCache(key)) { - return (ImageSampler*)(routine->getEntry()); + return (ImageSampler *)(routine->getEntry()); } std::unique_lock<std::mutex> lock(imageDescriptor->device->getSamplingRoutineCacheMutex()); - vk::Device::SamplingRoutineCache* cache = imageDescriptor->device->getSamplingRoutineCache(); + vk::Device::SamplingRoutineCache *cache = imageDescriptor->device->getSamplingRoutineCache(); auto routine = cache->query(key); if(routine) { - return (ImageSampler*)(routine->getEntry()); + return (ImageSampler *)(routine->getEntry()); } auto type = imageDescriptor->type; @@ -69,9 +69,9 @@ samplerState.swizzle = imageDescriptor->swizzle; samplerState.gatherComponent = instruction.gatherComponent; samplerState.highPrecisionFiltering = false; - samplerState.largeTexture = (imageDescriptor->extent.width > SHRT_MAX) || + samplerState.largeTexture = (imageDescriptor->extent.width > SHRT_MAX) || (imageDescriptor->extent.height > SHRT_MAX) || - (imageDescriptor->extent.depth > SHRT_MAX); + (imageDescriptor->extent.depth > SHRT_MAX); if(sampler) { @@ -100,7 +100,7 @@ routine = emitSamplerRoutine(instruction, samplerState); cache->add(key, routine); - return (ImageSampler*)(routine->getEntry()); + return (ImageSampler *)(routine->getEntry()); } std::shared_ptr<rr::Routine> SpirvShader::emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState) @@ -114,17 +114,17 @@ Pointer<SIMD::Float> out = function.Arg<3>(); Pointer<Byte> constants = function.Arg<4>(); - SIMD::Float uvw[4] = {0, 0, 0, 0}; + SIMD::Float uvw[4] = { 0, 0, 0, 0 }; SIMD::Float q = 0; SIMD::Float lodOrBias = 0; // Explicit level-of-detail, or bias added to the implicit level-of-detail (depending on samplerMethod). - Vector4f dsx = {0, 0, 0, 0}; - Vector4f dsy = {0, 0, 0, 0}; - Vector4f offset = {0, 0, 0, 0}; + Vector4f dsx = { 0, 0, 0, 0 }; + Vector4f dsy = { 0, 0, 0, 0 }; + Vector4f offset = { 0, 0, 0, 0 }; SIMD::Int sampleId = 0; SamplerFunction samplerFunction = instruction.getSamplerFunction(); uint32_t i = 0; - for( ; i < instruction.coordinates; i++) + for(; i < instruction.coordinates; i++) { uvw[i] = in[i]; } @@ -231,28 +231,28 @@ { switch(sampler->magFilter) { - case VK_FILTER_NEAREST: - switch(sampler->minFilter) - { - case VK_FILTER_NEAREST: return FILTER_POINT; - case VK_FILTER_LINEAR: return FILTER_MIN_LINEAR_MAG_POINT; + case VK_FILTER_NEAREST: + switch(sampler->minFilter) + { + case VK_FILTER_NEAREST: return FILTER_POINT; + case VK_FILTER_LINEAR: return FILTER_MIN_LINEAR_MAG_POINT; + default: + UNIMPLEMENTED("minFilter %d", sampler->minFilter); + return FILTER_POINT; + } + break; + case VK_FILTER_LINEAR: + switch(sampler->minFilter) + { + case VK_FILTER_NEAREST: return FILTER_MIN_POINT_MAG_LINEAR; + case VK_FILTER_LINEAR: return FILTER_LINEAR; + default: + UNIMPLEMENTED("minFilter %d", sampler->minFilter); + return FILTER_POINT; + } + break; default: - UNIMPLEMENTED("minFilter %d", sampler->minFilter); - return FILTER_POINT; - } - break; - case VK_FILTER_LINEAR: - switch(sampler->minFilter) - { - case VK_FILTER_NEAREST: return FILTER_MIN_POINT_MAG_LINEAR; - case VK_FILTER_LINEAR: return FILTER_LINEAR; - default: - UNIMPLEMENTED("minFilter %d", sampler->minFilter); - return FILTER_POINT; - } - break; - default: - break; + break; } UNIMPLEMENTED("magFilter %d", sampler->magFilter); @@ -273,11 +273,11 @@ switch(sampler->mipmapMode) { - case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MIPMAP_POINT; - case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MIPMAP_LINEAR; - default: - UNIMPLEMENTED("mipmapMode %d", sampler->mipmapMode); - return MIPMAP_POINT; + case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MIPMAP_POINT; + case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MIPMAP_LINEAR; + default: + UNIMPLEMENTED("mipmapMode %d", sampler->mipmapMode); + return MIPMAP_POINT; } } @@ -285,77 +285,77 @@ { switch(imageViewType) { - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - if(coordinateIndex == 3) - { - return ADDRESSING_LAYER; - } - // Fall through to CUBE case: - case VK_IMAGE_VIEW_TYPE_CUBE: - if(coordinateIndex <= 1) // Cube faces themselves are addressed as 2D images. - { - // Vulkan 1.1 spec: - // "Cube images ignore the wrap modes specified in the sampler. Instead, if VK_FILTER_NEAREST is used within a mip level then - // VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE is used, and if VK_FILTER_LINEAR is used within a mip level then sampling at the edges - // is performed as described earlier in the Cube map edge handling section." - // This corresponds with our 'SEAMLESS' addressing mode. - return ADDRESSING_SEAMLESS; - } - else if(coordinateIndex == 2) - { - // The cube face is an index into array layers. - return ADDRESSING_CUBEFACE; - } - else - { - return ADDRESSING_UNUSED; - } - break; + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + if(coordinateIndex == 3) + { + return ADDRESSING_LAYER; + } + // Fall through to CUBE case: + case VK_IMAGE_VIEW_TYPE_CUBE: + if(coordinateIndex <= 1) // Cube faces themselves are addressed as 2D images. + { + // Vulkan 1.1 spec: + // "Cube images ignore the wrap modes specified in the sampler. Instead, if VK_FILTER_NEAREST is used within a mip level then + // VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE is used, and if VK_FILTER_LINEAR is used within a mip level then sampling at the edges + // is performed as described earlier in the Cube map edge handling section." + // This corresponds with our 'SEAMLESS' addressing mode. + return ADDRESSING_SEAMLESS; + } + else if(coordinateIndex == 2) + { + // The cube face is an index into array layers. + return ADDRESSING_CUBEFACE; + } + else + { + return ADDRESSING_UNUSED; + } + break; - case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0. TODO(b/134669567) - if(coordinateIndex == 1) - { + case VK_IMAGE_VIEW_TYPE_1D: // Treated as 2D texture with second coordinate 0. TODO(b/134669567) + if(coordinateIndex == 1) + { + return ADDRESSING_WRAP; + } + else if(coordinateIndex >= 2) + { + return ADDRESSING_UNUSED; + } + break; + + case VK_IMAGE_VIEW_TYPE_3D: + if(coordinateIndex >= 3) + { + return ADDRESSING_UNUSED; + } + break; + + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: // Treated as 2D texture with second coordinate 0. TODO(b/134669567) + if(coordinateIndex == 1) + { + return ADDRESSING_WRAP; + } + // Fall through to 2D_ARRAY case: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + if(coordinateIndex == 2) + { + return ADDRESSING_LAYER; + } + else if(coordinateIndex >= 3) + { + return ADDRESSING_UNUSED; + } + // Fall through to 2D case: + case VK_IMAGE_VIEW_TYPE_2D: + if(coordinateIndex >= 2) + { + return ADDRESSING_UNUSED; + } + break; + + default: + UNIMPLEMENTED("imageViewType %d", imageViewType); return ADDRESSING_WRAP; - } - else if(coordinateIndex >= 2) - { - return ADDRESSING_UNUSED; - } - break; - - case VK_IMAGE_VIEW_TYPE_3D: - if(coordinateIndex >= 3) - { - return ADDRESSING_UNUSED; - } - break; - - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: // Treated as 2D texture with second coordinate 0. TODO(b/134669567) - if(coordinateIndex == 1) - { - return ADDRESSING_WRAP; - } - // Fall through to 2D_ARRAY case: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - if(coordinateIndex == 2) - { - return ADDRESSING_LAYER; - } - else if(coordinateIndex >= 3) - { - return ADDRESSING_UNUSED; - } - // Fall through to 2D case: - case VK_IMAGE_VIEW_TYPE_2D: - if(coordinateIndex >= 2) - { - return ADDRESSING_UNUSED; - } - break; - - default: - UNIMPLEMENTED("imageViewType %d", imageViewType); - return ADDRESSING_WRAP; } if(!sampler) @@ -373,23 +373,23 @@ VkSamplerAddressMode addressMode = VK_SAMPLER_ADDRESS_MODE_REPEAT; switch(coordinateIndex) { - case 0: addressMode = sampler->addressModeU; break; - case 1: addressMode = sampler->addressModeV; break; - case 2: addressMode = sampler->addressModeW; break; - default: UNSUPPORTED("coordinateIndex: %d", coordinateIndex); + case 0: addressMode = sampler->addressModeU; break; + case 1: addressMode = sampler->addressModeV; break; + case 2: addressMode = sampler->addressModeW; break; + default: UNSUPPORTED("coordinateIndex: %d", coordinateIndex); } switch(addressMode) { - case VK_SAMPLER_ADDRESS_MODE_REPEAT: return ADDRESSING_WRAP; - case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return ADDRESSING_MIRROR; - case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return ADDRESSING_CLAMP; - case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return ADDRESSING_BORDER; - case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return ADDRESSING_MIRRORONCE; - default: - UNIMPLEMENTED("addressMode %d", addressMode); - return ADDRESSING_WRAP; + case VK_SAMPLER_ADDRESS_MODE_REPEAT: return ADDRESSING_WRAP; + case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return ADDRESSING_MIRROR; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return ADDRESSING_CLAMP; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return ADDRESSING_BORDER; + case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return ADDRESSING_MIRRORONCE; + default: + UNIMPLEMENTED("addressMode %d", addressMode); + return ADDRESSING_WRAP; } } -} // namespace sw +} // namespace sw
diff --git a/src/Pipeline/SpirvShaderSpec.cpp b/src/Pipeline/SpirvShaderSpec.cpp index 2d3c402..3bfd153 100644 --- a/src/Pipeline/SpirvShaderSpec.cpp +++ b/src/Pipeline/SpirvShaderSpec.cpp
@@ -24,132 +24,132 @@ switch(opcode) { - case spv::OpIAdd: - case spv::OpISub: - case spv::OpIMul: - case spv::OpUDiv: - case spv::OpSDiv: - case spv::OpUMod: - case spv::OpSMod: - case spv::OpSRem: - case spv::OpShiftRightLogical: - case spv::OpShiftRightArithmetic: - case spv::OpShiftLeftLogical: - case spv::OpBitwiseOr: - case spv::OpLogicalOr: - case spv::OpBitwiseAnd: - case spv::OpLogicalAnd: - case spv::OpBitwiseXor: - case spv::OpLogicalEqual: - case spv::OpIEqual: - case spv::OpLogicalNotEqual: - case spv::OpINotEqual: - case spv::OpULessThan: - case spv::OpSLessThan: - case spv::OpUGreaterThan: - case spv::OpSGreaterThan: - case spv::OpULessThanEqual: - case spv::OpSLessThanEqual: - case spv::OpUGreaterThanEqual: - case spv::OpSGreaterThanEqual: - EvalSpecConstantBinaryOp(insn); - break; + case spv::OpIAdd: + case spv::OpISub: + case spv::OpIMul: + case spv::OpUDiv: + case spv::OpSDiv: + case spv::OpUMod: + case spv::OpSMod: + case spv::OpSRem: + case spv::OpShiftRightLogical: + case spv::OpShiftRightArithmetic: + case spv::OpShiftLeftLogical: + case spv::OpBitwiseOr: + case spv::OpLogicalOr: + case spv::OpBitwiseAnd: + case spv::OpLogicalAnd: + case spv::OpBitwiseXor: + case spv::OpLogicalEqual: + case spv::OpIEqual: + case spv::OpLogicalNotEqual: + case spv::OpINotEqual: + case spv::OpULessThan: + case spv::OpSLessThan: + case spv::OpUGreaterThan: + case spv::OpSGreaterThan: + case spv::OpULessThanEqual: + case spv::OpSLessThanEqual: + case spv::OpUGreaterThanEqual: + case spv::OpSGreaterThanEqual: + EvalSpecConstantBinaryOp(insn); + break; - case spv::OpSConvert: - case spv::OpFConvert: - case spv::OpUConvert: - case spv::OpSNegate: - case spv::OpNot: - case spv::OpLogicalNot: - case spv::OpQuantizeToF16: - EvalSpecConstantUnaryOp(insn); - break; + case spv::OpSConvert: + case spv::OpFConvert: + case spv::OpUConvert: + case spv::OpSNegate: + case spv::OpNot: + case spv::OpLogicalNot: + case spv::OpQuantizeToF16: + EvalSpecConstantUnaryOp(insn); + break; - case spv::OpSelect: - { - auto &result = CreateConstant(insn); - auto const &cond = getObject(insn.word(4)); - auto condIsScalar = (getType(cond.type).sizeInComponents == 1); - auto const &left = getObject(insn.word(5)); - auto const &right = getObject(insn.word(6)); - - for(auto i = 0u; i < getType(result.type).sizeInComponents; i++) + case spv::OpSelect: { - auto sel = cond.constantValue[condIsScalar ? 0 : i]; - result.constantValue[i] = sel ? left.constantValue[i] : right.constantValue[i]; - } - break; - } + auto &result = CreateConstant(insn); + auto const &cond = getObject(insn.word(4)); + auto condIsScalar = (getType(cond.type).sizeInComponents == 1); + auto const &left = getObject(insn.word(5)); + auto const &right = getObject(insn.word(6)); - case spv::OpCompositeExtract: - { - auto &result = CreateConstant(insn); - auto const &compositeObject = getObject(insn.word(4)); - auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - 5, insn.wordPointer(5)); - - for(auto i = 0u; i < getType(result.type).sizeInComponents; i++) - { - result.constantValue[i] = compositeObject.constantValue[firstComponent + i]; - } - break; - } - - case spv::OpCompositeInsert: - { - auto &result = CreateConstant(insn); - auto const &newPart = getObject(insn.word(4)); - auto const &oldObject = getObject(insn.word(5)); - auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - 6, insn.wordPointer(6)); - - // old components before - for(auto i = 0u; i < firstNewComponent; i++) - { - result.constantValue[i] = oldObject.constantValue[i]; - } - // new part - for(auto i = 0u; i < getType(newPart.type).sizeInComponents; i++) - { - result.constantValue[firstNewComponent + i] = newPart.constantValue[i]; - } - // old components after - for(auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++) - { - result.constantValue[i] = oldObject.constantValue[i]; - } - break; - } - - case spv::OpVectorShuffle: - { - auto &result = CreateConstant(insn); - auto const &firstHalf = getObject(insn.word(4)); - auto const &secondHalf = getObject(insn.word(5)); - - for(auto i = 0u; i < getType(result.type).sizeInComponents; i++) - { - auto selector = insn.word(6 + i); - if(selector == static_cast<uint32_t>(-1)) + for(auto i = 0u; i < getType(result.type).sizeInComponents; i++) { - // Undefined value, we'll use zero - result.constantValue[i] = 0; + auto sel = cond.constantValue[condIsScalar ? 0 : i]; + result.constantValue[i] = sel ? left.constantValue[i] : right.constantValue[i]; } - else if(selector < getType(firstHalf.type).sizeInComponents) - { - result.constantValue[i] = firstHalf.constantValue[selector]; - } - else - { - result.constantValue[i] = secondHalf.constantValue[selector - getType(firstHalf.type).sizeInComponents]; - } + break; } - break; - } - default: - // Other spec constant ops are possible, but require capabilities that are - // not exposed in our Vulkan implementation (eg Kernel), so we should never - // get here for correct shaders. - UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str()); + case spv::OpCompositeExtract: + { + auto &result = CreateConstant(insn); + auto const &compositeObject = getObject(insn.word(4)); + auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - 5, insn.wordPointer(5)); + + for(auto i = 0u; i < getType(result.type).sizeInComponents; i++) + { + result.constantValue[i] = compositeObject.constantValue[firstComponent + i]; + } + break; + } + + case spv::OpCompositeInsert: + { + auto &result = CreateConstant(insn); + auto const &newPart = getObject(insn.word(4)); + auto const &oldObject = getObject(insn.word(5)); + auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - 6, insn.wordPointer(6)); + + // old components before + for(auto i = 0u; i < firstNewComponent; i++) + { + result.constantValue[i] = oldObject.constantValue[i]; + } + // new part + for(auto i = 0u; i < getType(newPart.type).sizeInComponents; i++) + { + result.constantValue[firstNewComponent + i] = newPart.constantValue[i]; + } + // old components after + for(auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++) + { + result.constantValue[i] = oldObject.constantValue[i]; + } + break; + } + + case spv::OpVectorShuffle: + { + auto &result = CreateConstant(insn); + auto const &firstHalf = getObject(insn.word(4)); + auto const &secondHalf = getObject(insn.word(5)); + + for(auto i = 0u; i < getType(result.type).sizeInComponents; i++) + { + auto selector = insn.word(6 + i); + if(selector == static_cast<uint32_t>(-1)) + { + // Undefined value, we'll use zero + result.constantValue[i] = 0; + } + else if(selector < getType(firstHalf.type).sizeInComponents) + { + result.constantValue[i] = firstHalf.constantValue[selector]; + } + else + { + result.constantValue[i] = secondHalf.constantValue[selector - getType(firstHalf.type).sizeInComponents]; + } + } + break; + } + + default: + // Other spec constant ops are possible, but require capabilities that are + // not exposed in our Vulkan implementation (eg Kernel), so we should never + // get here for correct shaders. + UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str()); } } @@ -168,37 +168,37 @@ switch(opcode) { - case spv::OpSConvert: - case spv::OpFConvert: - case spv::OpUConvert: - UNREACHABLE("Not possible until we have multiple bit widths"); - break; + case spv::OpSConvert: + case spv::OpFConvert: + case spv::OpUConvert: + UNREACHABLE("Not possible until we have multiple bit widths"); + break; - case spv::OpSNegate: - v = -(int)l; - break; - case spv::OpNot: - case spv::OpLogicalNot: - v = ~l; - break; + case spv::OpSNegate: + v = -(int)l; + break; + case spv::OpNot: + case spv::OpLogicalNot: + v = ~l; + break; - case spv::OpQuantizeToF16: - { - // Can do this nicer with host code, but want to perfectly mirror the reactor code we emit. - auto abs = bit_cast<float>(l & 0x7FFFFFFF); - auto sign = l & 0x80000000; - auto isZero = abs < 0.000061035f ? ~0u : 0u; - auto isInf = abs > 65504.0f ? ~0u : 0u; - auto isNaN = (abs != abs) ? ~0u : 0u; - auto isInfOrNan = isInf | isNaN; - v = l & 0xFFFFE000; - v &= ~isZero | 0x80000000; - v = sign | (isInfOrNan & 0x7F800000) | (~isInfOrNan & v); - v |= isNaN & 0x400000; - break; - } - default: - UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str()); + case spv::OpQuantizeToF16: + { + // Can do this nicer with host code, but want to perfectly mirror the reactor code we emit. + auto abs = bit_cast<float>(l & 0x7FFFFFFF); + auto sign = l & 0x80000000; + auto isZero = abs < 0.000061035f ? ~0u : 0u; + auto isInf = abs > 65504.0f ? ~0u : 0u; + auto isNaN = (abs != abs) ? ~0u : 0u; + auto isInfOrNan = isInf | isNaN; + v = l & 0xFFFFE000; + v &= ~isZero | 0x80000000; + v = sign | (isInfOrNan & 0x7F800000) | (~isInfOrNan & v); + v |= isNaN & 0x400000; + break; + } + default: + UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str()); } } } @@ -220,93 +220,93 @@ switch(opcode) { - case spv::OpIAdd: - v = l + r; - break; - case spv::OpISub: - v = l - r; - break; - case spv::OpIMul: - v = l * r; - break; - case spv::OpUDiv: - v = (r == 0) ? 0 : l / r; - break; - case spv::OpUMod: - v = (r == 0) ? 0 : l % r; - break; - case spv::OpSDiv: - if(r == 0) r = UINT32_MAX; - if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX; - v = static_cast<int32_t>(l) / static_cast<int32_t>(r); - break; - case spv::OpSRem: - if(r == 0) r = UINT32_MAX; - if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX; - v = static_cast<int32_t>(l) % static_cast<int32_t>(r); - break; - case spv::OpSMod: - if(r == 0) r = UINT32_MAX; - if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX; - // Test if a signed-multiply would be negative. - v = static_cast<int32_t>(l) % static_cast<int32_t>(r); - if((v & 0x80000000) != (r & 0x80000000)) - v += r; - break; - case spv::OpShiftRightLogical: - v = l >> r; - break; - case spv::OpShiftRightArithmetic: - v = static_cast<int32_t>(l) >> r; - break; - case spv::OpShiftLeftLogical: - v = l << r; - break; - case spv::OpBitwiseOr: - case spv::OpLogicalOr: - v = l | r; - break; - case spv::OpBitwiseAnd: - case spv::OpLogicalAnd: - v = l & r; - break; - case spv::OpBitwiseXor: - v = l ^ r; - break; - case spv::OpLogicalEqual: - case spv::OpIEqual: - v = (l == r) ? ~0u : 0u; - break; - case spv::OpLogicalNotEqual: - case spv::OpINotEqual: - v = (l != r) ? ~0u : 0u; - break; - case spv::OpULessThan: - v = l < r ? ~0u : 0u; - break; - case spv::OpSLessThan: - v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~0u : 0u; - break; - case spv::OpUGreaterThan: - v = l > r ? ~0u : 0u; - break; - case spv::OpSGreaterThan: - v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~0u : 0u; - break; - case spv::OpULessThanEqual: - v = l <= r ? ~0u : 0u; - break; - case spv::OpSLessThanEqual: - v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~0u : 0u; - break; - case spv::OpUGreaterThanEqual: - v = l >= r ? ~0u : 0u; - break; - case spv::OpSGreaterThanEqual: - v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~0u : 0u; - break; - default: - UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str()); + case spv::OpIAdd: + v = l + r; + break; + case spv::OpISub: + v = l - r; + break; + case spv::OpIMul: + v = l * r; + break; + case spv::OpUDiv: + v = (r == 0) ? 0 : l / r; + break; + case spv::OpUMod: + v = (r == 0) ? 0 : l % r; + break; + case spv::OpSDiv: + if(r == 0) r = UINT32_MAX; + if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX; + v = static_cast<int32_t>(l) / static_cast<int32_t>(r); + break; + case spv::OpSRem: + if(r == 0) r = UINT32_MAX; + if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX; + v = static_cast<int32_t>(l) % static_cast<int32_t>(r); + break; + case spv::OpSMod: + if(r == 0) r = UINT32_MAX; + if(l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX; + // Test if a signed-multiply would be negative. + v = static_cast<int32_t>(l) % static_cast<int32_t>(r); + if((v & 0x80000000) != (r & 0x80000000)) + v += r; + break; + case spv::OpShiftRightLogical: + v = l >> r; + break; + case spv::OpShiftRightArithmetic: + v = static_cast<int32_t>(l) >> r; + break; + case spv::OpShiftLeftLogical: + v = l << r; + break; + case spv::OpBitwiseOr: + case spv::OpLogicalOr: + v = l | r; + break; + case spv::OpBitwiseAnd: + case spv::OpLogicalAnd: + v = l & r; + break; + case spv::OpBitwiseXor: + v = l ^ r; + break; + case spv::OpLogicalEqual: + case spv::OpIEqual: + v = (l == r) ? ~0u : 0u; + break; + case spv::OpLogicalNotEqual: + case spv::OpINotEqual: + v = (l != r) ? ~0u : 0u; + break; + case spv::OpULessThan: + v = l < r ? ~0u : 0u; + break; + case spv::OpSLessThan: + v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~0u : 0u; + break; + case spv::OpUGreaterThan: + v = l > r ? ~0u : 0u; + break; + case spv::OpSGreaterThan: + v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~0u : 0u; + break; + case spv::OpULessThanEqual: + v = l <= r ? ~0u : 0u; + break; + case spv::OpSLessThanEqual: + v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~0u : 0u; + break; + case spv::OpUGreaterThanEqual: + v = l >= r ? ~0u : 0u; + break; + case spv::OpSGreaterThanEqual: + v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~0u : 0u; + break; + default: + UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str()); } } }
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp index b2a731a..615bcf3 100644 --- a/src/Pipeline/VertexProgram.cpp +++ b/src/Pipeline/VertexProgram.cpp
@@ -25,30 +25,27 @@ namespace sw { VertexProgram::VertexProgram( - const VertexProcessor::State &state, - vk::PipelineLayout const *pipelineLayout, - SpirvShader const *spirvShader, - const vk::DescriptorSet::Bindings &descriptorSets) - : VertexRoutine(state, pipelineLayout, spirvShader), - descriptorSets(descriptorSets) + const VertexProcessor::State &state, + vk::PipelineLayout const *pipelineLayout, + SpirvShader const *spirvShader, + const vk::DescriptorSet::Bindings &descriptorSets) + : VertexRoutine(state, pipelineLayout, spirvShader) + , descriptorSets(descriptorSets) { routine.setImmutableInputBuiltins(spirvShader); - routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { assert(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID))))); }); - routine.setInputBuiltin(spirvShader, spv::BuiltInInstanceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine.setInputBuiltin(spirvShader, spv::BuiltInInstanceIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { // TODO: we could do better here; we know InstanceIndex is uniform across all lanes assert(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, instanceID))))); }); - routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) - { + routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) { ASSERT(builtin.SizeInComponents == 1); value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width)); }); @@ -63,7 +60,7 @@ { } -void VertexProgram::program(Pointer<UInt> &batch, UInt& vertexCount) +void VertexProgram::program(Pointer<UInt> &batch, UInt &vertexCount) { auto it = spirvShader->inputBuiltins.find(spv::BuiltInVertexIndex); if(it != spirvShader->inputBuiltins.end()) @@ -71,8 +68,8 @@ assert(it->second.SizeInComponents == 1); routine.getVariable(it->second.Id)[it->second.FirstComponent] = - As<Float4>(*Pointer<Int4>(As<Pointer<Int4>>(batch)) + - Int4(*Pointer<Int>(data + OFFSET(DrawData, baseVertex)))); + As<Float4>(*Pointer<Int4>(As<Pointer<Int4>>(batch)) + + Int4(*Pointer<Int>(data + OFFSET(DrawData, baseVertex)))); } auto activeLaneMask = SIMD::Int(0xFFFFFFFF); @@ -82,4 +79,4 @@ spirvShader->emitEpilog(&routine); } -} // namepsace sw +} // namespace sw
diff --git a/src/Pipeline/VertexProgram.hpp b/src/Pipeline/VertexProgram.hpp index 9a14713..6abb727 100644 --- a/src/Pipeline/VertexProgram.hpp +++ b/src/Pipeline/VertexProgram.hpp
@@ -15,8 +15,8 @@ #ifndef sw_VertexProgram_hpp #define sw_VertexProgram_hpp -#include "VertexRoutine.hpp" #include "ShaderCore.hpp" +#include "VertexRoutine.hpp" namespace sw { @@ -26,19 +26,19 @@ { public: VertexProgram( - const VertexProcessor::State &state, - vk::PipelineLayout const *pipelineLayout, - SpirvShader const *spirvShader, - const vk::DescriptorSet::Bindings &descriptorSets); + const VertexProcessor::State &state, + vk::PipelineLayout const *pipelineLayout, + SpirvShader const *spirvShader, + const vk::DescriptorSet::Bindings &descriptorSets); virtual ~VertexProgram(); private: - void program(Pointer<UInt> &batch, UInt& vertexCount) override; + void program(Pointer<UInt> &batch, UInt &vertexCount) override; const vk::DescriptorSet::Bindings &descriptorSets; }; -} // namepsace sw +} // namespace sw -#endif // sw_VertexProgram_hpp +#endif // sw_VertexProgram_hpp
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp index 85c395a..517ac70 100644 --- a/src/Pipeline/VertexRoutine.cpp +++ b/src/Pipeline/VertexRoutine.cpp
@@ -16,20 +16,20 @@ #include "Constants.hpp" #include "SpirvShader.hpp" -#include "Device/Vertex.hpp" #include "Device/Renderer.hpp" -#include "Vulkan/VkDebug.hpp" +#include "Device/Vertex.hpp" #include "System/Half.hpp" +#include "Vulkan/VkDebug.hpp" namespace sw { VertexRoutine::VertexRoutine( - const VertexProcessor::State &state, - vk::PipelineLayout const *pipelineLayout, - SpirvShader const *spirvShader) - : routine(pipelineLayout), - state(state), - spirvShader(spirvShader) + const VertexProcessor::State &state, + vk::PipelineLayout const *pipelineLayout, + SpirvShader const *spirvShader) + : routine(pipelineLayout) + , state(state) + , spirvShader(spirvShader) { spirvShader->emitProlog(&routine); } @@ -40,13 +40,13 @@ void VertexRoutine::generate() { - Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); - Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); - Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache,tag)); + Pointer<Byte> cache = task + OFFSET(VertexTask, vertexCache); + Pointer<Byte> vertexCache = cache + OFFSET(VertexCache, vertex); + Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache, tag)); - UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount)); + UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask, vertexCount)); - constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); + constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, constants)); // Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer. // On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache @@ -93,7 +93,7 @@ spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED || spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED) { - Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4)); + Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * (i / 4)); UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4)); Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex)); UInt robustnessSize(0); @@ -129,19 +129,19 @@ Int4 minY = CmpNLE(-posW, posY); Int4 minZ = CmpNLE(Float4(0.0f), posZ); - clipFlags = Pointer<Int>(constants + OFFSET(Constants,maxX))[SignMask(maxX)]; - clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxY))[SignMask(maxY)]; - clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxZ))[SignMask(maxZ)]; - clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minX))[SignMask(minX)]; - clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minY))[SignMask(minY)]; - clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minZ))[SignMask(minZ)]; + clipFlags = Pointer<Int>(constants + OFFSET(Constants, maxX))[SignMask(maxX)]; + clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxY))[SignMask(maxY)]; + clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxZ))[SignMask(maxZ)]; + clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minX))[SignMask(minX)]; + clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minY))[SignMask(minY)]; + clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minZ))[SignMask(minZ)]; - Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); - Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); - Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); + Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants, maxPos))); + Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants, maxPos))); + Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants, maxPos))); Int4 finiteXYZ = finiteX & finiteY & finiteZ; - clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)]; + clipFlags |= Pointer<Int>(constants + OFFSET(Constants, fini))[SignMask(finiteXYZ)]; } void VertexRoutine::computeCullMask() @@ -162,7 +162,7 @@ } Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch, - bool robustBufferAccess, UInt & robustnessSize, Int baseVertex) + bool robustBufferAccess, UInt &robustnessSize, Int baseVertex) { Vector4f v; // Because of the following rule in the Vulkan spec, we do not care if a very large negative @@ -193,7 +193,7 @@ switch(stream.type) { - case STREAMTYPE_FLOAT: + case STREAMTYPE_FLOAT: { if(stream.count == 0) { @@ -220,86 +220,86 @@ switch(stream.attribType) { - case SpirvShader::ATTRIBTYPE_INT: - if(stream.count >= 1) v.x = As<Float4>(Int4(v.x)); - if(stream.count >= 2) v.x = As<Float4>(Int4(v.y)); - if(stream.count >= 3) v.x = As<Float4>(Int4(v.z)); - if(stream.count >= 4) v.x = As<Float4>(Int4(v.w)); - break; - case SpirvShader::ATTRIBTYPE_UINT: - if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x)); - if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y)); - if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z)); - if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w)); - break; - default: - break; + case SpirvShader::ATTRIBTYPE_INT: + if(stream.count >= 1) v.x = As<Float4>(Int4(v.x)); + if(stream.count >= 2) v.x = As<Float4>(Int4(v.y)); + if(stream.count >= 3) v.x = As<Float4>(Int4(v.z)); + if(stream.count >= 4) v.x = As<Float4>(Int4(v.w)); + break; + case SpirvShader::ATTRIBTYPE_UINT: + if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x)); + if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y)); + if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z)); + if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w)); + break; + default: + break; } } } break; - case STREAMTYPE_BYTE: - if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float - { - v.x = Float4(*Pointer<Byte4>(source0)); - v.y = Float4(*Pointer<Byte4>(source1)); - v.z = Float4(*Pointer<Byte4>(source2)); - v.w = Float4(*Pointer<Byte4>(source3)); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - - if(stream.normalized) + case STREAMTYPE_BYTE: + if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float { - if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); - if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); - if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); - if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); + v.x = Float4(*Pointer<Byte4>(source0)); + v.y = Float4(*Pointer<Byte4>(source1)); + v.z = Float4(*Pointer<Byte4>(source2)); + v.w = Float4(*Pointer<Byte4>(source3)); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + + if(stream.normalized) + { + if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); + if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); + if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); + if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); + } } - } - else // Stream: UByte, Shader attrib: Int / UInt - { - v.x = As<Float4>(Int4(*Pointer<Byte4>(source0))); - v.y = As<Float4>(Int4(*Pointer<Byte4>(source1))); - v.z = As<Float4>(Int4(*Pointer<Byte4>(source2))); - v.w = As<Float4>(Int4(*Pointer<Byte4>(source3))); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - } - break; - case STREAMTYPE_SBYTE: - if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float - { - v.x = Float4(*Pointer<SByte4>(source0)); - v.y = Float4(*Pointer<SByte4>(source1)); - v.z = Float4(*Pointer<SByte4>(source2)); - v.w = Float4(*Pointer<SByte4>(source3)); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - - if(stream.normalized) + else // Stream: UByte, Shader attrib: Int / UInt { - if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); - if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); - if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); - if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); - } - } - else // Stream: SByte, Shader attrib: Int / UInt - { - v.x = As<Float4>(Int4(*Pointer<SByte4>(source0))); - v.y = As<Float4>(Int4(*Pointer<SByte4>(source1))); - v.z = As<Float4>(Int4(*Pointer<SByte4>(source2))); - v.w = As<Float4>(Int4(*Pointer<SByte4>(source3))); + v.x = As<Float4>(Int4(*Pointer<Byte4>(source0))); + v.y = As<Float4>(Int4(*Pointer<Byte4>(source1))); + v.z = As<Float4>(Int4(*Pointer<Byte4>(source2))); + v.w = As<Float4>(Int4(*Pointer<Byte4>(source3))); - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - } - break; - case STREAMTYPE_COLOR: + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + } + break; + case STREAMTYPE_SBYTE: + if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float + { + v.x = Float4(*Pointer<SByte4>(source0)); + v.y = Float4(*Pointer<SByte4>(source1)); + v.z = Float4(*Pointer<SByte4>(source2)); + v.w = Float4(*Pointer<SByte4>(source3)); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + + if(stream.normalized) + { + if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)); + if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)); + if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)); + if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)); + } + } + else // Stream: SByte, Shader attrib: Int / UInt + { + v.x = As<Float4>(Int4(*Pointer<SByte4>(source0))); + v.y = As<Float4>(Int4(*Pointer<SByte4>(source1))); + v.z = As<Float4>(Int4(*Pointer<SByte4>(source2))); + v.w = As<Float4>(Int4(*Pointer<SByte4>(source3))); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + } + break; + case STREAMTYPE_COLOR: { - v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); - v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); - v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); - v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); + v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); + v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); + v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); + v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); transpose4x4(v.x, v.y, v.z, v.w); @@ -309,119 +309,119 @@ v.z = t; } break; - case STREAMTYPE_SHORT: - if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float - { - v.x = Float4(*Pointer<Short4>(source0)); - v.y = Float4(*Pointer<Short4>(source1)); - v.z = Float4(*Pointer<Short4>(source2)); - v.w = Float4(*Pointer<Short4>(source3)); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - - if(stream.normalized) + case STREAMTYPE_SHORT: + if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float { - if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); - if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); - if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); - if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); + v.x = Float4(*Pointer<Short4>(source0)); + v.y = Float4(*Pointer<Short4>(source1)); + v.z = Float4(*Pointer<Short4>(source2)); + v.w = Float4(*Pointer<Short4>(source3)); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + + if(stream.normalized) + { + if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)); + if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)); + if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)); + if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)); + } } - } - else // Stream: Short, Shader attrib: Int/UInt, no type conversion - { - v.x = As<Float4>(Int4(*Pointer<Short4>(source0))); - v.y = As<Float4>(Int4(*Pointer<Short4>(source1))); - v.z = As<Float4>(Int4(*Pointer<Short4>(source2))); - v.w = As<Float4>(Int4(*Pointer<Short4>(source3))); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - } - break; - case STREAMTYPE_USHORT: - if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float - { - v.x = Float4(*Pointer<UShort4>(source0)); - v.y = Float4(*Pointer<UShort4>(source1)); - v.z = Float4(*Pointer<UShort4>(source2)); - v.w = Float4(*Pointer<UShort4>(source3)); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - - if(stream.normalized) + else // Stream: Short, Shader attrib: Int/UInt, no type conversion { - if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); - if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); - if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); - if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); + v.x = As<Float4>(Int4(*Pointer<Short4>(source0))); + v.y = As<Float4>(Int4(*Pointer<Short4>(source1))); + v.z = As<Float4>(Int4(*Pointer<Short4>(source2))); + v.w = As<Float4>(Int4(*Pointer<Short4>(source3))); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); } - } - else // Stream: UShort, Shader attrib: Int/UInt, no type conversion - { - v.x = As<Float4>(Int4(*Pointer<UShort4>(source0))); - v.y = As<Float4>(Int4(*Pointer<UShort4>(source1))); - v.z = As<Float4>(Int4(*Pointer<UShort4>(source2))); - v.w = As<Float4>(Int4(*Pointer<UShort4>(source3))); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - } - break; - case STREAMTYPE_INT: - if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float - { - v.x = Float4(*Pointer<Int4>(source0)); - v.y = Float4(*Pointer<Int4>(source1)); - v.z = Float4(*Pointer<Int4>(source2)); - v.w = Float4(*Pointer<Int4>(source3)); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - - if(stream.normalized) + break; + case STREAMTYPE_USHORT: + if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float { - if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); - if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); - if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); - if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); + v.x = Float4(*Pointer<UShort4>(source0)); + v.y = Float4(*Pointer<UShort4>(source1)); + v.z = Float4(*Pointer<UShort4>(source2)); + v.w = Float4(*Pointer<UShort4>(source3)); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + + if(stream.normalized) + { + if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort)); + if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort)); + if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort)); + if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort)); + } } - } - else // Stream: Int, Shader attrib: Int/UInt, no type conversion - { - v.x = *Pointer<Float4>(source0); - v.y = *Pointer<Float4>(source1); - v.z = *Pointer<Float4>(source2); - v.w = *Pointer<Float4>(source3); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - } - break; - case STREAMTYPE_UINT: - if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float - { - v.x = Float4(*Pointer<UInt4>(source0)); - v.y = Float4(*Pointer<UInt4>(source1)); - v.z = Float4(*Pointer<UInt4>(source2)); - v.w = Float4(*Pointer<UInt4>(source3)); - - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - - if(stream.normalized) + else // Stream: UShort, Shader attrib: Int/UInt, no type conversion { - if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); - if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); - if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); - if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); - } - } - else // Stream: UInt, Shader attrib: Int/UInt, no type conversion - { - v.x = *Pointer<Float4>(source0); - v.y = *Pointer<Float4>(source1); - v.z = *Pointer<Float4>(source2); - v.w = *Pointer<Float4>(source3); + v.x = As<Float4>(Int4(*Pointer<UShort4>(source0))); + v.y = As<Float4>(Int4(*Pointer<UShort4>(source1))); + v.z = As<Float4>(Int4(*Pointer<UShort4>(source2))); + v.w = As<Float4>(Int4(*Pointer<UShort4>(source3))); - transpose4xN(v.x, v.y, v.z, v.w, stream.count); - } - break; - case STREAMTYPE_HALF: + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + } + break; + case STREAMTYPE_INT: + if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float + { + v.x = Float4(*Pointer<Int4>(source0)); + v.y = Float4(*Pointer<Int4>(source1)); + v.z = Float4(*Pointer<Int4>(source2)); + v.w = Float4(*Pointer<Int4>(source3)); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + + if(stream.normalized) + { + if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); + if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); + if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); + if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); + } + } + else // Stream: Int, Shader attrib: Int/UInt, no type conversion + { + v.x = *Pointer<Float4>(source0); + v.y = *Pointer<Float4>(source1); + v.z = *Pointer<Float4>(source2); + v.w = *Pointer<Float4>(source3); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + } + break; + case STREAMTYPE_UINT: + if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float + { + v.x = Float4(*Pointer<UInt4>(source0)); + v.y = Float4(*Pointer<UInt4>(source1)); + v.z = Float4(*Pointer<UInt4>(source2)); + v.w = Float4(*Pointer<UInt4>(source3)); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + + if(stream.normalized) + { + if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); + if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); + if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); + if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); + } + } + else // Stream: UInt, Shader attrib: Int/UInt, no type conversion + { + v.x = *Pointer<Float4>(source0); + v.y = *Pointer<Float4>(source1); + v.z = *Pointer<Float4>(source2); + v.w = *Pointer<Float4>(source3); + + transpose4xN(v.x, v.y, v.z, v.w, stream.count); + } + break; + case STREAMTYPE_HALF: { if(stream.count >= 1) { @@ -430,10 +430,10 @@ UShort x2 = *Pointer<UShort>(source2 + 0); UShort x3 = *Pointer<UShort>(source3 + 0); - v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4); - v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4); - v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4); - v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4); + v.x.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x0) * 4); + v.x.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x1) * 4); + v.x.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x2) * 4); + v.x.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x3) * 4); } if(stream.count >= 2) @@ -443,10 +443,10 @@ UShort y2 = *Pointer<UShort>(source2 + 2); UShort y3 = *Pointer<UShort>(source3 + 2); - v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4); - v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4); - v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4); - v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4); + v.y.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y0) * 4); + v.y.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y1) * 4); + v.y.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y2) * 4); + v.y.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y3) * 4); } if(stream.count >= 3) @@ -456,10 +456,10 @@ UShort z2 = *Pointer<UShort>(source2 + 4); UShort z3 = *Pointer<UShort>(source3 + 4); - v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4); - v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4); - v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4); - v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4); + v.z.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z0) * 4); + v.z.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z1) * 4); + v.z.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z2) * 4); + v.z.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z3) * 4); } if(stream.count >= 4) @@ -469,14 +469,14 @@ UShort w2 = *Pointer<UShort>(source2 + 6); UShort w3 = *Pointer<UShort>(source3 + 6); - v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4); - v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4); - v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4); - v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4); + v.w.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w0) * 4); + v.w.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w1) * 4); + v.w.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w2) * 4); + v.w.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w3) * 4); } } break; - case STREAMTYPE_2_10_10_10_INT: + case STREAMTYPE_2_10_10_10_INT: { Int4 src; src = Insert(src, *Pointer<Int>(source0), 0); @@ -498,7 +498,7 @@ } } break; - case STREAMTYPE_2_10_10_10_UINT: + case STREAMTYPE_2_10_10_10_UINT: { Int4 src; src = Insert(src, *Pointer<Int>(source0), 0); @@ -520,8 +520,8 @@ } } break; - default: - UNSUPPORTED("stream.type %d", int(stream.type)); + default: + UNSUPPORTED("stream.type %d", int(stream.type)); } if(stream.count < 1) v.x = Float4(0.0f); @@ -567,17 +567,17 @@ Float4 rhw = Float4(1.0f) / w; Vector4f proj; - proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF)))); - proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF)))); + proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData, WxF)))); + proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData, HxF)))); proj.z = pos.z * rhw; proj.w = rhw; transpose4x4(pos.x, pos.y, pos.z, pos.w); - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,position), 16) = pos.w; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,position), 16) = pos.z; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,position), 16) = pos.y; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,position), 16) = pos.x; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, position), 16) = pos.w; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, position), 16) = pos.z; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, position), 16) = pos.y; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, position), 16) = pos.x; it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize); if(it != spirvShader->outputBuiltins.end()) @@ -585,10 +585,10 @@ ASSERT(it->second.SizeInComponents == 1); auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent]; - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,pointSize)) = Extract(psize, 3); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,pointSize)) = Extract(psize, 2); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,pointSize)) = Extract(psize, 1); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, pointSize)) = Extract(psize, 3); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, pointSize)) = Extract(psize, 2); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, pointSize)) = Extract(psize, 1); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, pointSize)) = Extract(psize, 0); } it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance); @@ -598,10 +598,10 @@ for(unsigned int i = 0; i < count; i++) { auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i]; - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 3); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 2); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 1); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 0); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 3); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 2); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 1); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 0); } } @@ -612,29 +612,29 @@ for(unsigned int i = 0; i < count; i++) { auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i]; - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 3); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 2); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 1); - *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 0); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 3); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 2); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 1); + *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 0); } } - *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF; - *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF; - *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 8) & 0x0000000FF; - *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 0) & 0x0000000FF; + *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 24) & 0x0000000FF; + *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 16) & 0x0000000FF; + *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 8) & 0x0000000FF; + *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 0) & 0x0000000FF; - *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullMask)) = -((cullMask >> 3) & 1); - *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullMask)) = -((cullMask >> 2) & 1); - *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullMask)) = -((cullMask >> 1) & 1); - *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullMask)) = -((cullMask >> 0) & 1); + *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullMask)) = -((cullMask >> 3) & 1); + *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullMask)) = -((cullMask >> 2) & 1); + *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullMask)) = -((cullMask >> 1) & 1); + *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullMask)) = -((cullMask >> 0) & 1); transpose4x4(proj.x, proj.y, proj.z, proj.w); - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,projected), 16) = proj.w; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,projected), 16) = proj.z; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,projected), 16) = proj.y; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,projected), 16) = proj.x; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, projected), 16) = proj.w; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, projected), 16) = proj.z; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, projected), 16) = proj.y; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, projected), 16) = proj.x; for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4) { @@ -651,22 +651,22 @@ transpose4x4(v.x, v.y, v.z, v.w); - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,v[i]), 16) = v.w; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,v[i]), 16) = v.z; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,v[i]), 16) = v.y; - *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,v[i]), 16) = v.x; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, v[i]), 16) = v.w; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, v[i]), 16) = v.z; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, v[i]), 16) = v.y; + *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, v[i]), 16) = v.x; } } } void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry) { - *Pointer<Int4>(vertex + OFFSET(Vertex,position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,position)); - *Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize)); + *Pointer<Int4>(vertex + OFFSET(Vertex, position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, position)); + *Pointer<Int>(vertex + OFFSET(Vertex, pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, pointSize)); - *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags)); - *Pointer<Int>(vertex + OFFSET(Vertex,cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,cullMask)); - *Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected)); + *Pointer<Int>(vertex + OFFSET(Vertex, clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, clipFlags)); + *Pointer<Int>(vertex + OFFSET(Vertex, cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, cullMask)); + *Pointer<Int4>(vertex + OFFSET(Vertex, projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, projected)); for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++) {
diff --git a/src/Pipeline/VertexRoutine.hpp b/src/Pipeline/VertexRoutine.hpp index ab458b4..c490cd7 100644 --- a/src/Pipeline/VertexRoutine.hpp +++ b/src/Pipeline/VertexRoutine.hpp
@@ -20,14 +20,21 @@ #include "Device/Color.hpp" #include "Device/VertexProcessor.hpp" -namespace vk { class PipelineLayout; } +namespace vk { +class PipelineLayout; +} namespace sw { class VertexRoutinePrototype : public VertexRoutineFunction { public: - VertexRoutinePrototype() : vertex(Arg<0>()), batch(Arg<1>()), task(Arg<2>()), data(Arg<3>()) {} + VertexRoutinePrototype() + : vertex(Arg<0>()) + , batch(Arg<1>()) + , task(Arg<2>()) + , data(Arg<3>()) + {} virtual ~VertexRoutinePrototype() {} protected: @@ -41,9 +48,9 @@ { public: VertexRoutine( - const VertexProcessor::State &state, - vk::PipelineLayout const *pipelineLayout, - SpirvShader const *spirvShader); + const VertexProcessor::State &state, + vk::PipelineLayout const *pipelineLayout, + SpirvShader const *spirvShader); virtual ~VertexRoutine(); void generate(); @@ -57,15 +64,15 @@ SpirvRoutine routine; const VertexProcessor::State &state; - SpirvShader const * const spirvShader; + SpirvShader const *const spirvShader; private: - virtual void program(Pointer<UInt> &batch, UInt& vertexCount) = 0; + virtual void program(Pointer<UInt> &batch, UInt &vertexCount) = 0; typedef VertexProcessor::State::Input Stream; Vector4f readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch, - bool robustBufferAccess, UInt& robustnessSize, Int baseVertex); + bool robustBufferAccess, UInt &robustnessSize, Int baseVertex); void readInput(Pointer<UInt> &batch); void computeClipFlags(); void computeCullMask(); @@ -75,4 +82,4 @@ } // namespace sw -#endif // sw_VertexRoutine_hpp +#endif // sw_VertexRoutine_hpp