Device: Don't use a global for the cluster count.
Pass the cluster count to the QuadRasterizer instead of using a rather nasty back dependency to the renderer.
This allows the cluster count to be adjusted per-draw without requiring synchronization around the count.
The primary goal of this change is that we can scale the number of clusters based on the complexity of the draw.
Bug: b/139142453
Change-Id: I0379e16568de402f186ee2dd1e8b2346bed30efd
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35571
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Device/PixelProcessor.hpp b/src/Device/PixelProcessor.hpp
index 17cda85..460afa4 100644
--- a/src/Device/PixelProcessor.hpp
+++ b/src/Device/PixelProcessor.hpp
@@ -119,7 +119,7 @@
};
public:
- typedef void (*RoutinePointer)(const Primitive *primitive, int count, int thread, DrawData *draw);
+ typedef void (*RoutinePointer)(const Primitive *primitive, int count, int cluster, int clusterCount, DrawData *draw);
PixelProcessor();
diff --git a/src/Device/QuadRasterizer.cpp b/src/Device/QuadRasterizer.cpp
index ce4dbf1..c756d6a 100644
--- a/src/Device/QuadRasterizer.cpp
+++ b/src/Device/QuadRasterizer.cpp
@@ -34,7 +34,6 @@
{
constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
occlusion = 0;
- int clusterCount = Renderer::getClusterCount();
Do
{
@@ -72,6 +71,8 @@
Pointer<Byte> zBuffer;
Pointer<Byte> sBuffer;
+ Int clusterCountLog2 = 31 - Ctlz(UInt(clusterCount), false);
+
for(int index = 0; index < RENDERTARGETS; index++)
{
if(state.colorWriteActive(index))
@@ -192,24 +193,22 @@
}
}
- int clusterCount = Renderer::getClusterCount();
-
for(int index = 0; index < RENDERTARGETS; index++)
{
if(state.colorWriteActive(index))
{
- cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])) << (1 + log2i(clusterCount)); // FIXME: Precompute
+ cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])) << (1 + clusterCountLog2); // FIXME: Precompute
}
}
if(state.depthTestActive)
{
- zBuffer += *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)) << (1 + log2i(clusterCount)); // FIXME: Precompute
+ zBuffer += *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)) << (1 + clusterCountLog2); // FIXME: Precompute
}
if(state.stencilActive)
{
- sBuffer += *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)) << (1 + log2i(clusterCount)); // FIXME: Precompute
+ sBuffer += *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)) << (1 + clusterCountLog2); // FIXME: Precompute
}
y += 2 * clusterCount;
diff --git a/src/Device/Rasterizer.hpp b/src/Device/Rasterizer.hpp
index c268e18..a206508 100644
--- a/src/Device/Rasterizer.hpp
+++ b/src/Device/Rasterizer.hpp
@@ -21,16 +21,17 @@
namespace sw
{
- class Rasterizer : public Function<Void(Pointer<Byte>, Int, Int, Pointer<Byte>)>
+ class Rasterizer : public Function<Void(Pointer<Byte>, Int, Int, Int, Pointer<Byte>)>
{
public:
- Rasterizer() : primitive(Arg<0>()), count(Arg<1>()), cluster(Arg<2>()), data(Arg<3>()) {}
+ Rasterizer() : primitive(Arg<0>()), count(Arg<1>()), cluster(Arg<2>()), clusterCount(Arg<3>()), data(Arg<4>()) {}
virtual ~Rasterizer() {}
protected:
Pointer<Byte> primitive;
Int count;
Int cluster;
+ Int clusterCount;
Pointer<Byte> data;
};
}
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index 7030897..628d549 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -691,7 +691,7 @@
DrawData *data = draw->data;
PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
- pixelRoutine(primitive, visible, cluster, data);
+ pixelRoutine(primitive, visible, cluster, clusterCount, data);
}
finishRendering(task[threadIndex]);