Vulkan: Optimize shaderClipDistance and shaderCullDistance
Only process clip/cull distances if they're actually outputted from the vertex shader.
There's still overhead of having these distances stored in each vertex / primitive, but we should optimize the structure sizes as a larger set of changes.
Bug: b/139207336
Tests: dEQP-VK.clipping.*
Change-Id: I8f04b1c3ea823bb1a8cf62f18c987e01cd0c979a
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35032
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Device/PixelProcessor.cpp b/src/Device/PixelProcessor.cpp
index e38991f..26731a2 100644
--- a/src/Device/PixelProcessor.cpp
+++ b/src/Device/PixelProcessor.cpp
@@ -154,6 +154,9 @@
{
State state;
+ state.numClipDistances = context->vertexShader->getNumOutputClipDistances();
+ state.numCullDistances = context->vertexShader->getNumOutputCullDistances();
+
if(context->pixelShader)
{
state.shaderID = context->pixelShader->getSerialID();
diff --git a/src/Device/PixelProcessor.hpp b/src/Device/PixelProcessor.hpp
index d6d8736..f657a59 100644
--- a/src/Device/PixelProcessor.hpp
+++ b/src/Device/PixelProcessor.hpp
@@ -63,6 +63,9 @@
uint64_t shaderID;
+ unsigned int numClipDistances;
+ unsigned int numCullDistances;
+
VkCompareOp depthCompareMode;
bool depthWriteEnable;
diff --git a/src/Device/QuadRasterizer.cpp b/src/Device/QuadRasterizer.cpp
index ca4dc2d..a3494d8 100644
--- a/src/Device/QuadRasterizer.cpp
+++ b/src/Device/QuadRasterizer.cpp
@@ -158,13 +158,13 @@
}
}
- for (int i = 0; i < MAX_CLIP_DISTANCES; i++)
+ for (unsigned int i = 0; i < state.numClipDistances; i++)
{
DclipDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].C), 16) +
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, clipDistance[i].B), 16);
}
- for (int i = 0; i < MAX_CULL_DISTANCES; i++)
+ for (unsigned int i = 0; i < state.numCullDistances; i++)
{
DcullDistance[i] = *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].C), 16) +
yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive, cullDistance[i].B), 16);
diff --git a/src/Device/SetupProcessor.cpp b/src/Device/SetupProcessor.cpp
index 61634af..69371ab 100644
--- a/src/Device/SetupProcessor.cpp
+++ b/src/Device/SetupProcessor.cpp
@@ -81,6 +81,9 @@
state.multiSample = context->sampleCount;
state.rasterizerDiscard = context->rasterizerDiscard;
+ state.numClipDistances = context->vertexShader->getNumOutputClipDistances();
+ state.numCullDistances = context->vertexShader->getNumOutputCullDistances();
+
if (context->pixelShader)
{
for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
diff --git a/src/Device/SetupProcessor.hpp b/src/Device/SetupProcessor.hpp
index 335a6b2..683c93c 100644
--- a/src/Device/SetupProcessor.hpp
+++ b/src/Device/SetupProcessor.hpp
@@ -51,6 +51,8 @@
VkCullModeFlags cullMode : BITS(VK_CULL_MODE_FLAG_BITS_MAX_ENUM);
unsigned int multiSample : 3; // 1, 2 or 4
bool rasterizerDiscard : 1;
+ unsigned int numClipDistances : 4; // [0 - 8]
+ unsigned int numCullDistances : 4; // [0 - 8]
SpirvShader::InterfaceComponent gradient[MAX_INTERFACE_COMPONENTS];
};
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index eb60628..999afe8 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -163,7 +163,7 @@
setBuiltins(x, y, z, w, cMask);
- for (uint32_t i = 0; i < MAX_CLIP_DISTANCES; i++)
+ for (uint32_t i = 0; i < state.numClipDistances; i++)
{
auto distance = interpolate(xxxx, DclipDistance[i], rhw,
primitive + OFFSET(Primitive, clipDistance[i]),
@@ -197,12 +197,15 @@
auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance);
if(it != spirvShader->inputBuiltins.end())
{
- for (uint32_t i = 0; i < it->second.SizeInComponents; i++)
+ for (uint32_t i = 0; i < state.numCullDistances; i++)
{
- routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
- interpolate(xxxx, DcullDistance[i], rhw,
- primitive + OFFSET(Primitive, cullDistance[i]),
- false, true, false);
+ if (i < it->second.SizeInComponents)
+ {
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
+ interpolate(xxxx, DcullDistance[i], rhw,
+ primitive + OFFSET(Primitive, cullDistance[i]),
+ false, true, false);
+ }
}
}
}
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp
index 273f02c..dbe9feb 100644
--- a/src/Pipeline/SetupRoutine.cpp
+++ b/src/Pipeline/SetupRoutine.cpp
@@ -456,7 +456,7 @@
}
}
- for (int i = 0; i < MAX_CLIP_DISTANCES; i++)
+ for (unsigned int i = 0; i < state.numClipDistances; i++)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
OFFSET(Vertex, clipDistance[i]),
@@ -464,7 +464,7 @@
false, true);
}
- for (int i = 0; i < MAX_CULL_DISTANCES; i++)
+ for (unsigned int i = 0; i < state.numCullDistances; i++)
{
setupGradient(primitive, tri, w012, M, v0, v1, v2,
OFFSET(Vertex, cullDistance[i]),
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index a99fc6f..ad0f740 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -508,6 +508,36 @@
return capabilities;
}
+ // getNumOutputClipDistances() returns the number of ClipDistances
+ // outputted by this shader.
+ unsigned int getNumOutputClipDistances() const
+ {
+ if (getUsedCapabilities().ClipDistance)
+ {
+ auto it = outputBuiltins.find(spv::BuiltInClipDistance);
+ if(it != outputBuiltins.end())
+ {
+ return it->second.SizeInComponents;
+ }
+ }
+ return 0;
+ }
+
+ // getNumOutputCullDistances() returns the number of CullDistances
+ // outputted by this shader.
+ unsigned int getNumOutputCullDistances() const
+ {
+ if (getUsedCapabilities().CullDistance)
+ {
+ auto it = outputBuiltins.find(spv::BuiltInCullDistance);
+ if(it != outputBuiltins.end())
+ {
+ return it->second.SizeInComponents;
+ }
+ }
+ return 0;
+ }
+
enum AttribType : unsigned char
{
ATTRIBTYPE_FLOAT,
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index 317c4c6..2fccb08 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -148,18 +148,15 @@
{
cullMask = Int(15);
- if (spirvShader->getUsedCapabilities().CullDistance)
+ auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
+ if (it != spirvShader->outputBuiltins.end())
{
- auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
- if (it != spirvShader->outputBuiltins.end())
+ auto count = spirvShader->getNumOutputCullDistances();
+ for (uint32_t i = 0; i < count; i++)
{
- auto &var = routine.getVariable(it->second.Id);
- for (uint32_t i = 0; i < it->second.SizeInComponents; i++)
- {
- auto const &distance = var[it->second.FirstComponent + i];
- auto mask = SignMask(CmpGE(distance, SIMD::Float(0)));
- cullMask &= mask;
- }
+ auto const &distance = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
+ auto mask = SignMask(CmpGE(distance, SIMD::Float(0)));
+ cullMask &= mask;
}
}
}
@@ -594,55 +591,33 @@
*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0);
}
- uint32_t clipIndex = 0;
- if (spirvShader->getUsedCapabilities().ClipDistance)
+ it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
+ if(it != spirvShader->outputBuiltins.end())
{
- it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
- if(it != spirvShader->outputBuiltins.end())
+ auto count = spirvShader->getNumOutputClipDistances();
+ for(unsigned int i = 0; i < count; i++)
{
- ASSERT(it->second.SizeInComponents <= MAX_CLIP_DISTANCES);
- for(; clipIndex < it->second.SizeInComponents; clipIndex++)
- {
- auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + clipIndex];
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 3);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 2);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 1);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[clipIndex])) = Extract(dist, 0);
- }
+ auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 3);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 2);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 1);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[i])) = Extract(dist, 0);
}
}
- for(; clipIndex < MAX_CLIP_DISTANCES; clipIndex++)
- {
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipDistance[clipIndex])) = Float(0);
- }
- uint32_t cullIndex = 0;
- if (spirvShader->getUsedCapabilities().CullDistance)
+ it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
+ if(it != spirvShader->outputBuiltins.end())
{
- it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
- if(it != spirvShader->outputBuiltins.end())
+ auto count = spirvShader->getNumOutputCullDistances();
+ for(unsigned int i = 0; i < count; i++)
{
- ASSERT(it->second.SizeInComponents <= MAX_CULL_DISTANCES);
- for(; cullIndex < it->second.SizeInComponents; cullIndex++)
- {
- auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + cullIndex];
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 3);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 2);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 1);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[cullIndex])) = Extract(dist, 0);
- }
+ auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 3);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 2);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 1);
+ *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[i])) = Extract(dist, 0);
}
}
- for(; cullIndex < MAX_CULL_DISTANCES; cullIndex++)
- {
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
- *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,cullDistance[cullIndex])) = Float(0);
- }
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
@@ -700,11 +675,11 @@
*Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4);
}
}
- for(int i = 0; i < MAX_CLIP_DISTANCES; i++)
+ for(unsigned int i = 0; i < spirvShader->getNumOutputClipDistances(); i++)
{
*Pointer<Float>(vertex + OFFSET(Vertex, clipDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, clipDistance[i]), 4);
}
- for(int i = 0; i < MAX_CULL_DISTANCES; i++)
+ for(unsigned int i = 0; i < spirvShader->getNumOutputCullDistances(); i++)
{
*Pointer<Float>(vertex + OFFSET(Vertex, cullDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, cullDistance[i]), 4);
}