Support frustum clipping against z = -1.
This allows performing the depth range adjustment only once as part of the
viewport transform and not have it be affected by transform feedback.
Change-Id: I89fba3b131c234bea064b1b65f50b62261f4c307
Reviewed-on: https://swiftshader-review.googlesource.com/5421
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Shader/SetupRoutine.cpp b/src/Shader/SetupRoutine.cpp
index ab7176b..cc29156 100644
--- a/src/Shader/SetupRoutine.cpp
+++ b/src/Shader/SetupRoutine.cpp
@@ -302,13 +302,9 @@
conditionalRotate2(wMax == w2, v0, v1, v2);
}
- Float4 p0 = *Pointer<Float4>(v0 + pos * 16, 16);
- Float4 p1 = *Pointer<Float4>(v1 + pos * 16, 16);
- Float4 p2 = *Pointer<Float4>(v2 + pos * 16, 16);
-
- Float w0 = p0.w;
- Float w1 = p1.w;
- Float w2 = p2.w;
+ Float w0 = *Pointer<Float>(v0 + pos * 16 + 12);
+ Float w1 = *Pointer<Float>(v1 + pos * 16 + 12);
+ Float w2 = *Pointer<Float>(v2 + pos * 16 + 12);
Float4 w012;
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index 6428cdf..c67a8bc 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -72,13 +72,12 @@
UInt cacheIndex = index & 0x0000003F;
Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
- writeVertex(vertex, cacheLine, primitiveNumber, indexInPrimitive);
+ writeVertex(vertex, cacheLine);
- vertex += sizeof(Vertex);
- batch += sizeof(unsigned int);
- vertexCount--;
if(state.transformFeedbackEnabled != 0)
{
+ transformFeedback(vertex, primitiveNumber, indexInPrimitive);
+
indexInPrimitive++;
If(indexInPrimitive == 3)
{
@@ -86,6 +85,10 @@
indexInPrimitive = 0;
}
}
+
+ vertex += sizeof(Vertex);
+ batch += sizeof(unsigned int);
+ vertexCount--;
}
Until(vertexCount == 0)
@@ -107,41 +110,26 @@
{
int pos = state.positionRegister;
- Float4 outPosZ = o[pos].z;
- if(state.transformFeedbackEnabled && symmetricNormalizedDepth && !state.fixedFunction)
- {
- outPosZ = (outPosZ + o[pos].w) * Float4(0.5f);
- }
-
Int4 maxX = CmpLT(o[pos].w, o[pos].x);
Int4 maxY = CmpLT(o[pos].w, o[pos].y);
- Int4 maxZ = CmpLT(o[pos].w, outPosZ);
-
+ Int4 maxZ = CmpLT(o[pos].w, o[pos].z);
Int4 minX = CmpNLE(-o[pos].w, o[pos].x);
Int4 minY = CmpNLE(-o[pos].w, o[pos].y);
- Int4 minZ = CmpNLE(Float4(0.0f), outPosZ);
+ Int4 minZ = symmetricNormalizedDepth ? CmpNLE(-o[pos].w, o[pos].z) : CmpNLE(Float4(0.0f), o[pos].z);
- Int flags;
-
- flags = SignMask(maxX);
- clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing
- flags = SignMask(maxY);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + flags * 4);
- flags = SignMask(maxZ);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + flags * 4);
- flags = SignMask(minX);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + flags * 4);
- flags = SignMask(minY);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + flags * 4);
- flags = SignMask(minZ);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + flags * 4);
+ clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4);
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + SignMask(maxZ) * 4);
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + SignMask(minX) * 4);
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4);
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4);
Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
- Int4 finiteZ = CmpLE(Abs(outPosZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
+ Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
- flags = SignMask(finiteX & finiteY & finiteZ);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + flags * 4);
+ Int4 finiteXYZ = finiteX & finiteY & finiteZ;
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4);
if(state.preTransformed)
{
@@ -586,11 +574,6 @@
o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,XXXX)) * o[pos].w;
o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,YYYY)) * o[pos].w;
}
-
- if(!state.transformFeedbackEnabled && symmetricNormalizedDepth && !state.fixedFunction)
- {
- o[pos].z = (o[pos].z + o[pos].w) * Float4(0.5f);
- }
}
void VertexRoutine::writeCache(Pointer<Byte> &cacheLine)
@@ -661,6 +644,7 @@
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF;
+ // Viewport transform
int pos = state.positionRegister;
v.x = o[pos].x;
@@ -668,6 +652,11 @@
v.z = o[pos].z;
v.w = o[pos].w;
+ if(symmetricNormalizedDepth)
+ {
+ v.z = (v.z + v.w) * Float4(0.5f); // [-1, 1] -> [0, 1]
+ }
+
Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
Float4 rhw = Float4(1.0f) / w;
@@ -684,7 +673,7 @@
*Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w;
}
- void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache, const UInt &primitiveNumber, const UInt &indexInPrimitive)
+ void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache)
{
for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
{
@@ -696,45 +685,39 @@
*Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X));
*Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags));
+ }
- if(state.transformFeedbackEnabled != 0)
+ void VertexRoutine::transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive)
+ {
+ If(indexInPrimitive < state.verticesPerPrimitive)
{
- If(indexInPrimitive < state.verticesPerPrimitive)
+ UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive;
+
+ for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
{
- UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive;
- for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; ++i)
+ if(state.transformFeedbackEnabled & (1ULL << i))
{
- if(state.transformFeedbackEnabled & (1ULL << i))
+ UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i]));
+ UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i]));
+ UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i]));
+ UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i]));
+
+ Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float));
+ Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float);
+
+ For(UInt r = 0, r < row, r++)
{
- UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i]));
- UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i]));
- UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i]));
- UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i]));
+ UInt rOffsetX = r * col * sizeof(float);
+ UInt rOffset4 = r * sizeof(float4);
- Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float));
- Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float);
-
- For(UInt r = 0, r < row, r++)
+ For(UInt c = 0, c < col, c++)
{
- UInt rOffsetX = r * col * sizeof(float);
- UInt rOffset4 = r * sizeof(float4);
- For(UInt c = 0, c < col, c++)
- {
- UInt cOffset = c * sizeof(float);
- *Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset);
- }
+ UInt cOffset = c * sizeof(float);
+ *Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset);
}
}
}
}
-
- // Make this correction after transform feedback has been outputted
- if(symmetricNormalizedDepth && !state.fixedFunction && state.output[state.positionRegister].write)
- {
- Float z = *Pointer<Float>(vertex + OFFSET(Vertex, v[state.positionRegister]) + 2 * sizeof(float));
- Float w = *Pointer<Float>(vertex + OFFSET(Vertex, v[state.positionRegister]) + 3 * sizeof(float));
- *Pointer<Float>(vertex + OFFSET(Vertex, v[state.positionRegister]) + 2 * sizeof(float)) = (z + w) * Float(0.5f);
- }
}
}
}
diff --git a/src/Shader/VertexRoutine.hpp b/src/Shader/VertexRoutine.hpp
index eadd795..4aec45f 100644
--- a/src/Shader/VertexRoutine.hpp
+++ b/src/Shader/VertexRoutine.hpp
@@ -63,7 +63,8 @@
void computeClipFlags();
void postTransform();
void writeCache(Pointer<Byte> &cacheLine);
- void writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheLine, const UInt &primitiveNumber, const UInt &indexInPrimitive);
+ void writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheLine);
+ void transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive);
};
}