Add support for configurable subpixel precision
Bug: b/141676114
Change-Id: I47e7d90e14b44533e64d352ecc6440495c0b7d3f
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/36597
Presubmit-Ready: Antonio Maiorano <amaiorano@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index de4e9af..2f137b8 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -330,16 +330,17 @@
float N = viewport.minDepth;
float F = viewport.maxDepth;
float Z = F - N;
+ constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
if(context->isDrawTriangle(false))
{
N += context->depthBias;
}
- data->Wx16 = replicate(W * 16);
- data->Hx16 = replicate(H * 16);
- data->X0x16 = replicate(X0 * 16 - 8);
- data->Y0x16 = replicate(Y0 * 16 - 8);
+ data->WxF = replicate(W * subPixF);
+ data->HxF = replicate(H * subPixF);
+ data->X0xF = replicate(X0 * subPixF - subPixF / 2);
+ data->Y0xF = replicate(Y0 * subPixF - subPixF / 2);
data->halfPixelX = replicate(0.5f / W);
data->halfPixelY = replicate(0.5f / H);
data->viewportHeight = abs(viewport.height);
@@ -799,8 +800,10 @@
return false;
}
- const float W = data.Wx16[0] * (1.0f / 16.0f);
- const float H = data.Hx16[0] * (1.0f / 16.0f);
+ constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
+
+ const float W = data.WxF[0] * (1.0f / subPixF);
+ const float H = data.HxF[0] * (1.0f / subPixF);
float dx = W * (P1.x / P1.w - P0.x / P0.w);
float dy = H * (P1.y / P1.w - P0.y / P0.w);
@@ -1029,8 +1032,10 @@
triangle.v1 = triangle.v0;
triangle.v2 = triangle.v0;
- triangle.v1.projected.x += iround(16 * 0.5f * pSize);
- triangle.v2.projected.y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
+ constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
+
+ triangle.v1.projected.x += iround(subPixF * 0.5f * pSize);
+ triangle.v2.projected.y -= iround(subPixF * 0.5f * pSize) * (data.HxF[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
return setupRoutine(&primitive, &triangle, &polygon, &data);
}
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index 9b45f74..11a58ea 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -79,10 +79,10 @@
PixelProcessor::Factor factor;
unsigned int occlusion[MaxClusterCount]; // Number of pixels passing depth test
- float4 Wx16;
- float4 Hx16;
- float4 X0x16;
- float4 Y0x16;
+ float4 WxF;
+ float4 HxF;
+ float4 X0xF;
+ float4 Y0xF;
float4 halfPixelX;
float4 halfPixelY;
float viewportHeight;
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp
index 5095cbe..d8595b5 100644
--- a/src/Pipeline/Constants.cpp
+++ b/src/Pipeline/Constants.cpp
@@ -290,29 +290,18 @@
sRGBtoLinear12_16[i] = (unsigned short)(clamp(sw::sRGBtoLinear((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF));
}
- // VK_SAMPLE_COUNT_4_BIT
- // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#primsrast-multisampling
- constexpr float sampleLocations4[][2] = {
- {0.375, 0.125},
- {0.875, 0.375},
- {0.125, 0.625},
- {0.625, 0.875},
- };
-
- // Vulkan spec sample positions are relative to 0,0 in top left corner, with Y+ going down.
- // Convert to our space, with 0,0 in center, and Y+ going up.
constexpr float4 X[4] = {
- sw::replicate(sampleLocations4[0][0] - 0.5f), // -0.125
- sw::replicate(sampleLocations4[1][0] - 0.5f), // +0.375
- sw::replicate(sampleLocations4[2][0] - 0.5f), // -0.375
- sw::replicate(sampleLocations4[3][0] - 0.5f), // +0.125
+ sw::replicate(SampleLocationsX[0]),
+ sw::replicate(SampleLocationsX[1]),
+ sw::replicate(SampleLocationsX[2]),
+ sw::replicate(SampleLocationsX[3]),
};
constexpr float4 Y[4] = {
- sw::replicate(-(sampleLocations4[0][1] - 0.5f)), // +0.375
- sw::replicate(-(sampleLocations4[1][1] - 0.5f)), // +0.125
- sw::replicate(-(sampleLocations4[2][1] - 0.5f)), // -0.125
- sw::replicate(-(sampleLocations4[3][1] - 0.5f)), // -0.375
+ sw::replicate(SampleLocationsY[0]),
+ sw::replicate(SampleLocationsY[1]),
+ sw::replicate(SampleLocationsY[2]),
+ sw::replicate(SampleLocationsY[3]),
};
for(int q = 0; q < 4; q++)
@@ -332,8 +321,11 @@
}
}
- const int Xf[4] = {-5, +5, +2, -2}; // Fragment offsets
- const int Yf[4] = {-2, +2, -5, +5}; // Fragment offsets
+ constexpr auto subPixB = vk::SUBPIXEL_PRECISION_BITS;
+
+ // Reorder sample points for fragment offset computation
+ const int Xf[4] = { toFixedPoint(X[2][0], subPixB), toFixedPoint(X[1][0], subPixB), toFixedPoint(X[3][0], subPixB), toFixedPoint(X[0][0], subPixB) };
+ const int Yf[4] = { toFixedPoint(Y[2][0], subPixB), toFixedPoint(Y[1][0], subPixB), toFixedPoint(Y[3][0], subPixB), toFixedPoint(Y[0][0], subPixB) };
memcpy(&this->Xf, &Xf, sizeof(Xf));
memcpy(&this->Yf, &Yf, sizeof(Yf));
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp
index 861887c..58c8e10 100644
--- a/src/Pipeline/Constants.hpp
+++ b/src/Pipeline/Constants.hpp
@@ -16,6 +16,8 @@
#define sw_Constants_hpp
#include "System/Types.hpp"
+#include "System/Math.hpp"
+#include "Vulkan/VkConfig.h"
namespace sw
{
@@ -86,6 +88,36 @@
float4 X[4];
float4 Y[4];
+ // VK_SAMPLE_COUNT_4_BIT
+ // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#primsrast-multisampling
+ static constexpr float VkSampleLocations4[][2] = {
+ {0.375, 0.125},
+ {0.875, 0.375},
+ {0.125, 0.625},
+ {0.625, 0.875},
+ };
+
+ // Vulkan spec sample positions are relative to 0,0 in top left corner, with Y+ going down.
+ // Convert to our space, with 0,0 in center, and Y+ going up.
+ static constexpr float SampleLocationsX[4] = {
+ VkSampleLocations4[0][0] - 0.5f,
+ VkSampleLocations4[1][0] - 0.5f,
+ VkSampleLocations4[2][0] - 0.5f,
+ VkSampleLocations4[3][0] - 0.5f,
+ };
+
+ static constexpr float SampleLocationsY[4] = {
+ -(VkSampleLocations4[0][1] - 0.5f),
+ -(VkSampleLocations4[1][1] - 0.5f),
+ -(VkSampleLocations4[2][1] - 0.5f),
+ -(VkSampleLocations4[3][1] - 0.5f),
+ };
+
+ // Compute the yMin and yMax multisample offsets so that they are just
+ // large enough (+/- max range - epsilon) to include sample points
+ static constexpr int yMinMultiSampleOffset = sw::toFixedPoint(1, vk::SUBPIXEL_PRECISION_BITS) - sw::toFixedPoint(sw::max(SampleLocationsY[0], SampleLocationsY[1], SampleLocationsY[2], SampleLocationsY[3]), vk::SUBPIXEL_PRECISION_BITS) - 1;
+ static constexpr int yMaxMultiSampleOffset = sw::toFixedPoint(1, vk::SUBPIXEL_PRECISION_BITS) + sw::toFixedPoint(sw::max(SampleLocationsY[0], SampleLocationsY[1], SampleLocationsY[2], SampleLocationsY[3]), vk::SUBPIXEL_PRECISION_BITS) - 1;
+
dword maxX[16];
dword maxY[16];
dword maxZ[16];
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp
index 37cc5d5..7ebd9ef 100644
--- a/src/Pipeline/SetupRoutine.cpp
+++ b/src/Pipeline/SetupRoutine.cpp
@@ -139,8 +139,8 @@
Float w = v.w;
Float rhw = IfThenElse(w != 0.0f, 1.0f / w, Float(1.0f));
- X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,Wx16)));
- Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,Hx16)));
+ X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,X0xF)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,WxF)));
+ Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,Y0xF)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,HxF)));
i++;
}
@@ -162,15 +162,19 @@
}
Until(i >= n)
+ constexpr int subPixB = vk::SUBPIXEL_PRECISION_BITS;
+ constexpr int subPixM = vk::SUBPIXEL_PRECISION_MASK;
+ constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
+
if(state.multiSample > 1)
{
- yMin = (yMin + 0x0A) >> 4;
- yMax = (yMax + 0x14) >> 4;
+ yMin = (yMin + Constants::yMinMultiSampleOffset) >> subPixB;
+ yMax = (yMax + Constants::yMaxMultiSampleOffset) >> subPixB;
}
else
{
- yMin = (yMin + 0x0F) >> 4;
- yMax = (yMax + 0x0F) >> 4;
+ yMin = (yMin + subPixM) >> subPixB;
+ yMax = (yMax + subPixM) >> subPixB;
}
yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
@@ -213,7 +217,7 @@
{
Int xMin = *Pointer<Int>(data + OFFSET(DrawData, scissorX0));
Int xMax = *Pointer<Int>(data + OFFSET(DrawData, scissorX1));
- Short x = Short(Clamp((X[0] + 0xF) >> 4, xMin, xMax));
+ Short x = Short(Clamp((X[0] + subPixM) >> subPixB, xMin, xMax));
For(Int y = yMin - 1, y < yMax + 1, y++)
{
@@ -323,8 +327,8 @@
Y2 = Y1 + X0 - X1;
}
- Float dx = Float(X0) * (1.0f / 16.0f);
- Float dy = Float(Y0) * (1.0f / 16.0f);
+ Float dx = Float(X0) * (1.0f / subPixF);
+ Float dy = Float(Y0) * (1.0f / subPixF);
X1 -= X0;
Y1 -= Y0;
@@ -332,11 +336,11 @@
X2 -= X0;
Y2 -= Y0;
- Float x1 = w1 * (1.0f / 16.0f) * Float(X1);
- Float y1 = w1 * (1.0f / 16.0f) * Float(Y1);
+ Float x1 = w1 * (1.0f / subPixF) * Float(X1);
+ Float y1 = w1 * (1.0f / subPixF) * Float(Y1);
- Float x2 = w2 * (1.0f / 16.0f) * Float(X2);
- Float y2 = w2 * (1.0f / 16.0f) * Float(Y2);
+ Float x2 = w2 * (1.0f / subPixF) * Float(X2);
+ Float y2 = w2 * (1.0f / subPixF) * Float(Y2);
Float a = x1 * y2 - x2 * y1;
@@ -403,10 +407,10 @@
if(!point)
{
- Float x1 = Float(X1) * (1.0f / 16.0f);
- Float y1 = Float(Y1) * (1.0f / 16.0f);
- Float x2 = Float(X2) * (1.0f / 16.0f);
- Float y2 = Float(Y2) * (1.0f / 16.0f);
+ Float x1 = Float(X1) * (1.0f / subPixF);
+ Float y1 = Float(Y1) * (1.0f / subPixF);
+ Float x2 = Float(X2) * (1.0f / subPixF);
+ Float y2 = Float(Y2) * (1.0f / subPixF);
Float D = *Pointer<Float>(data + OFFSET(DrawData,depthRange)) / (x1 * y2 - x2 * y1);
@@ -509,8 +513,11 @@
Int Y1 = IfThenElse(swap, Yb, Ya);
Int Y2 = IfThenElse(swap, Ya, Yb);
- Int y1 = Max((Y1 + 0x0000000F) >> 4, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
- Int y2 = Min((Y2 + 0x0000000F) >> 4, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
+ constexpr int subPixB = vk::SUBPIXEL_PRECISION_BITS;
+ constexpr int subPixM = vk::SUBPIXEL_PRECISION_MASK;
+
+ Int y1 = Max((Y1 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
+ Int y2 = Min((Y2 + subPixM) >> subPixB, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
If(y1 < y2)
{
@@ -525,11 +532,11 @@
Int DX12 = X2 - X1;
Int DY12 = Y2 - Y1;
- Int FDX12 = DX12 << 4;
- Int FDY12 = DY12 << 4;
+ Int FDX12 = DX12 << subPixB;
+ Int FDY12 = DY12 << subPixB;
- Int X = DX12 * ((y1 << 4) - Y1) + (X1 & 0x0000000F) * DY12;
- Int x = (X1 >> 4) + X / FDY12; // Edge
+ Int X = DX12 * ((y1 << subPixB) - Y1) + (X1 & subPixM) * DY12;
+ Int x = (X1 >> subPixB) + X / FDY12; // Edge
Int d = X % FDY12; // Error-term
Int ceil = -d >> 31; // Ceiling division: remainder <= 0
x -= ceil;
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index acc13cd..baa69e4 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -543,8 +543,8 @@
Float4 rhw = Float4(1.0f) / w;
Vector4f proj;
- proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16))));
- proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16))));
+ proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF))));
+ proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF))));
proj.z = pos.z * rhw;
proj.w = rhw;
diff --git a/src/System/Math.hpp b/src/System/Math.hpp
index 7bd3890..efef5fd 100644
--- a/src/System/Math.hpp
+++ b/src/System/Math.hpp
@@ -32,37 +32,37 @@
#undef max
template<class T>
- inline T max(T a, T b)
+ inline T constexpr max(T a, T b)
{
return a > b ? a : b;
}
template<class T>
- inline T min(T a, T b)
+ inline constexpr T min(T a, T b)
{
return a < b ? a : b;
}
template<class T>
- inline T max(T a, T b, T c)
+ inline constexpr T max(T a, T b, T c)
{
return max(max(a, b), c);
}
template<class T>
- inline T min(T a, T b, T c)
+ inline constexpr T min(T a, T b, T c)
{
return min(min(a, b), c);
}
template<class T>
- inline T max(T a, T b, T c, T d)
+ inline constexpr T max(T a, T b, T c, T d)
{
return max(max(a, b), max(c, d));
}
template<class T>
- inline T min(T a, T b, T c, T d)
+ inline constexpr T min(T a, T b, T c, T d)
{
return min(min(a, b), min(c, d));
}
@@ -372,6 +372,11 @@
{
return static_cast<int>(min(x, 0x7FFFFFFFu));
}
+
+ // Convert floating value v to fixed point with p digits after the decimal point
+ constexpr int toFixedPoint(float v, int p) {
+ return static_cast<int>(v * (1 << p));
+ }
}
#endif // sw_Math_hpp
diff --git a/src/Vulkan/VkConfig.h b/src/Vulkan/VkConfig.h
index 905fcf2..3bc9819 100644
--- a/src/Vulkan/VkConfig.h
+++ b/src/Vulkan/VkConfig.h
@@ -78,6 +78,10 @@
MAX_POINT_SIZE = 1, // Large points are not supported. If/when we turn this on, must be >= 64.
};
+constexpr int SUBPIXEL_PRECISION_BITS = 4;
+constexpr float SUBPIXEL_PRECISION_FACTOR = static_cast<float>(1 << SUBPIXEL_PRECISION_BITS);
+constexpr int SUBPIXEL_PRECISION_MASK = 0xFFFFFFFF >> (32 - SUBPIXEL_PRECISION_BITS);
+
}
#endif // VK_CONFIG_HPP_
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 2ad8cf8..44a2ba4 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -200,7 +200,7 @@
{ 65535, 65535, 65535 }, // maxComputeWorkGroupCount[3]
128, // maxComputeWorkGroupInvocations
{ 128, 128, 64, }, // maxComputeWorkGroupSize[3]
- 4, // subPixelPrecisionBits
+ vk::SUBPIXEL_PRECISION_BITS, // subPixelPrecisionBits
4, // subTexelPrecisionBits
4, // mipmapPrecisionBits
UINT32_MAX, // maxDrawIndexedIndexValue