Rework setup, VS->FS structures, etc for Vulkan
- Remnants of old fixed function attributes all gone
- Initial support for some builtins to prove the model
- Setup now driven by correct shader state
- VS->FS intermediate structure matches SPIRV model -- builtins are not
in location space; location space itself is flat scalars rather than
vec4-oriented.
There are still some vertex pipe features which are not supported, as
ES3 didn't have them -- proper handling of noperspective, etc.
Change-Id: Ia8e3c72af54c4d1cbcc18482a741daa5e8e7c053
Bug: b/120799499
Reviewed-on: https://swiftshader-review.googlesource.com/c/24376
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Chris Forbes <chrisforbes@google.com>
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index aa62cee..6cf20f0 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -31,6 +31,7 @@
#include "System/Timer.hpp"
#include "Vulkan/VkDebug.hpp"
#include "Pipeline/SpirvShader.hpp"
+#include "Vertex.hpp"
#undef max
@@ -1236,7 +1237,6 @@
const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
int ms = state.multiSample;
- int pos = state.positionRegister;
const DrawData *data = draw.data;
int visible = 0;
@@ -1248,7 +1248,7 @@
if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
{
- Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
+ Polygon polygon(&v0.builtins.position, &v1.builtins.position, &v2.builtins.position);
int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
@@ -1332,10 +1332,8 @@
Vertex &v0 = triangle.v0;
Vertex &v1 = triangle.v1;
- int pos = state.positionRegister;
-
- const float4 &P0 = v0.v[pos];
- const float4 &P1 = v1.v[pos];
+ const float4 &P0 = v0.builtins.position;
+ const float4 &P1 = v1.builtins.position;
if(P0.w <= 0 && P1.w <= 0)
{
@@ -1525,30 +1523,17 @@
Vertex &v = triangle.v0;
- float pSize;
-
- int pts = state.pointSizeRegister;
-
- if(state.pointSizeRegister != Unused)
- {
- pSize = v.v[pts].y;
- }
- else
- {
- pSize = 1.0f;
- }
+ float pSize = v.builtins.pointSize;
pSize = clamp(pSize, data.pointSizeMin, data.pointSizeMax);
float4 P[4];
int C[4];
- int pos = state.positionRegister;
-
- P[0] = v.v[pos];
- P[1] = v.v[pos];
- P[2] = v.v[pos];
- P[3] = v.v[pos];
+ P[0] = v.builtins.position;
+ P[1] = v.builtins.position;
+ P[2] = v.builtins.position;
+ P[3] = v.builtins.position;
const float X = pSize * P[0].w * data.halfPixelX[0];
const float Y = pSize * P[0].w * data.halfPixelY[0];
@@ -1572,8 +1557,8 @@
triangle.v1 = triangle.v0;
triangle.v2 = triangle.v0;
- triangle.v1.X += iround(16 * 0.5f * pSize);
- triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
+ triangle.v1.projected.x += iround(16 * 0.5f * pSize);
+ triangle.v2.projected.y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
Polygon polygon(P, 4);
diff --git a/src/Device/SetupProcessor.cpp b/src/Device/SetupProcessor.cpp
index 800d320..4e0823b 100644
--- a/src/Device/SetupProcessor.cpp
+++ b/src/Device/SetupProcessor.cpp
@@ -87,60 +87,16 @@
state.slopeDepthBias = context->slopeDepthBias != 0.0f;
state.vFace = context->pixelShader && context->pixelShader->hasBuiltinInput(spv::BuiltInFrontFacing);
- state.positionRegister = Pos;
- state.pointSizeRegister = Unused;
-
state.multiSample = context->getMultiSampleCount();
state.rasterizerDiscard = context->rasterizerDiscard;
- //TODO: route properly
- state.positionRegister = 0;//context->vertexShader->getPositionRegister();
- state.pointSizeRegister = 1;//context->vertexShader->getPointSizeRegister();
-
- for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
+ for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
{
- for(int component = 0; component < 4; component++)
- {
- state.gradient[interpolant][component].attribute = Unused;
- state.gradient[interpolant][component].flat = false;
- state.gradient[interpolant][component].wrap = false;
- }
+ state.gradient[interpolant] = context->pixelShader->inputs[interpolant];
}
const bool point = context->isDrawPoint();
-// for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
-// {
-// for(int component = 0; component < 4; component++)
-// {
-// const Shader::Semantic& semantic = context->pixelShader->getInput(interpolant, component);
-//
-// if(semantic.active())
-// {
-// int input = interpolant;
-// for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
-// {
-// if(semantic == context->vertexShader->getOutput(i, component))
-// {
-// input = i;
-// break;
-// }
-// }
-//
-// bool flat = point;
-//
-// switch(semantic.usage)
-// {
-// case Shader::USAGE_TEXCOORD: flat = false; break;
-// case Shader::USAGE_COLOR: flat = semantic.flat || point; break;
-// }
-//
-// state.gradient[interpolant][component].attribute = input;
-// state.gradient[interpolant][component].flat = flat;
-// }
-// }
-// }
-
state.hash = state.computeHash();
return state;
diff --git a/src/Device/SetupProcessor.hpp b/src/Device/SetupProcessor.hpp
index 797c362..7001bce 100644
--- a/src/Device/SetupProcessor.hpp
+++ b/src/Device/SetupProcessor.hpp
@@ -15,6 +15,7 @@
#ifndef sw_SetupProcessor_hpp
#define sw_SetupProcessor_hpp
+#include <Pipeline/SpirvShader.hpp>
#include "Context.hpp"
#include "RoutineCache.hpp"
#include "System/Types.hpp"
@@ -41,8 +42,6 @@
bool interpolateZ : 1;
bool interpolateW : 1;
bool perspective : 1;
- unsigned int positionRegister : BITS(VERTEX_OUTPUT_LAST);
- unsigned int pointSizeRegister : BITS(VERTEX_OUTPUT_LAST);
CullMode cullMode : BITS(CULL_LAST);
bool twoSidedStencil : 1;
bool slopeDepthBias : 1;
@@ -50,14 +49,7 @@
unsigned int multiSample : 3; // 1, 2 or 4
bool rasterizerDiscard : 1;
- struct Gradient
- {
- unsigned char attribute : BITS(VERTEX_OUTPUT_LAST);
- bool flat : 1;
- bool wrap : 1;
- };
-
- Gradient gradient[MAX_FRAGMENT_INPUTS][4];
+ SpirvShader::InterfaceComponent gradient[MAX_INTERFACE_COMPONENTS];
};
struct State : States
diff --git a/src/Device/Vertex.hpp b/src/Device/Vertex.hpp
index 972bbf3..27b8b18 100644
--- a/src/Device/Vertex.hpp
+++ b/src/Device/Vertex.hpp
@@ -21,75 +21,25 @@
namespace sw
{
- enum Out
- {
- // Default vertex output semantics
- Pos = 0,
- C0 = 1, // Diffuse
- C1 = 2, // Specular
- T0 = 3,
- T1 = 4,
- T2 = 5,
- T3 = 6,
- T4 = 7,
- T5 = 8,
- T6 = 9,
- T7 = 10,
- Fog = 11, // x component
- Pts = Fog, // y component
-
- // Variable semantics
- V0 = 0,
- Vn_1 = MAX_VERTEX_OUTPUTS - 1,
-
- Unused,
- VERTEX_OUTPUT_LAST = Unused,
- };
-
- struct UVWQ
- {
- float u;
- float v;
- float w;
- float q;
-
- float &operator[](int i)
- {
- return (&u)[i];
- }
- };
-
ALIGN(16, struct Vertex
{
- union
+ float v[MAX_INTERFACE_COMPONENTS];
+
+ struct
{
- struct // Fixed semantics
- {
- // Position
- float x;
- float y;
- float z;
- float w;
-
- float4 C[2]; // Diffuse and specular color
-
- UVWQ T[8]; // Texture coordinates
-
- float f; // Fog
- float pSize; // Point size
- };
-
- float4 v[MAX_VERTEX_OUTPUTS]; // Generic components using semantic declaration
- };
-
- // Projected coordinates
- int X;
- int Y;
- float Z;
- float W;
+ float4 position;
+ float pointSize;
+ } builtins;
+ struct
+ {
+ int x;
+ int y;
+ float z;
+ float w;
+ } projected;
int clipFlags;
- int padding[3];
+ int padding[2];
});
static_assert((sizeof(Vertex) & 0x0000000F) == 0, "Vertex size not a multiple of 16 bytes (alignment requirement)");
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp
index d3fd5c3..80a4557 100644
--- a/src/Pipeline/SetupRoutine.cpp
+++ b/src/Pipeline/SetupRoutine.cpp
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <Device/Vertex.hpp>
#include "SetupRoutine.hpp"
#include "Constants.hpp"
@@ -54,8 +55,6 @@
const int V1 = (triangle || line) ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0);
const int V2 = triangle ? OFFSET(Triangle,v2) : (line ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0));
- int pos = state.positionRegister;
-
Pointer<Byte> v0 = tri + V0;
Pointer<Byte> v1 = tri + V1;
Pointer<Byte> v2 = tri + V2;
@@ -63,13 +62,13 @@
Array<Int> X(16);
Array<Int> Y(16);
- X[0] = *Pointer<Int>(v0 + OFFSET(Vertex,X));
- X[1] = *Pointer<Int>(v1 + OFFSET(Vertex,X));
- X[2] = *Pointer<Int>(v2 + OFFSET(Vertex,X));
+ X[0] = *Pointer<Int>(v0 + OFFSET(Vertex,projected.x));
+ X[1] = *Pointer<Int>(v1 + OFFSET(Vertex,projected.x));
+ X[2] = *Pointer<Int>(v2 + OFFSET(Vertex,projected.x));
- Y[0] = *Pointer<Int>(v0 + OFFSET(Vertex,Y));
- Y[1] = *Pointer<Int>(v1 + OFFSET(Vertex,Y));
- Y[2] = *Pointer<Int>(v2 + OFFSET(Vertex,Y));
+ Y[0] = *Pointer<Int>(v0 + OFFSET(Vertex,projected.y));
+ Y[1] = *Pointer<Int>(v1 + OFFSET(Vertex,projected.y));
+ Y[2] = *Pointer<Int>(v2 + OFFSET(Vertex,projected.y));
Int d = 1; // Winding direction
@@ -91,9 +90,9 @@
Return(false);
}
- Int w0w1w2 = *Pointer<Int>(v0 + pos * 16 + 12) ^
- *Pointer<Int>(v1 + pos * 16 + 12) ^
- *Pointer<Int>(v2 + pos * 16 + 12);
+ Int w0w1w2 = *Pointer<Int>(v0 + OFFSET(Vertex, builtins.position.w)) ^
+ *Pointer<Int>(v1 + OFFSET(Vertex, builtins.position.w)) ^
+ *Pointer<Int>(v2 + OFFSET(Vertex, builtins.position.w));
A = IfThenElse(w0w1w2 < 0, -A, A);
@@ -279,9 +278,9 @@
// Sort by minimum y
if(triangle)
{
- Float y0 = *Pointer<Float>(v0 + pos * 16 + 4);
- Float y1 = *Pointer<Float>(v1 + pos * 16 + 4);
- Float y2 = *Pointer<Float>(v2 + pos * 16 + 4);
+ Float y0 = *Pointer<Float>(v0 + OFFSET(Vertex, builtins.position.y));
+ Float y1 = *Pointer<Float>(v1 + OFFSET(Vertex, builtins.position.y));
+ Float y2 = *Pointer<Float>(v2 + OFFSET(Vertex, builtins.position.y));
Float yMin = Min(Min(y0, y1), y2);
@@ -292,9 +291,9 @@
// Sort by maximum w
if(triangle)
{
- Float w0 = *Pointer<Float>(v0 + pos * 16 + 12);
- Float w1 = *Pointer<Float>(v1 + pos * 16 + 12);
- Float w2 = *Pointer<Float>(v2 + pos * 16 + 12);
+ Float w0 = *Pointer<Float>(v0 + OFFSET(Vertex, builtins.position.w));
+ Float w1 = *Pointer<Float>(v1 + OFFSET(Vertex, builtins.position.w));
+ Float w2 = *Pointer<Float>(v2 + OFFSET(Vertex, builtins.position.w));
Float wMax = Max(Max(w0, w1), w2);
@@ -302,9 +301,9 @@
conditionalRotate2(wMax == w2, v0, v1, v2);
}
- Float w0 = *Pointer<Float>(v0 + pos * 16 + 12);
- Float w1 = *Pointer<Float>(v1 + pos * 16 + 12);
- Float w2 = *Pointer<Float>(v2 + pos * 16 + 12);
+ Float w0 = *Pointer<Float>(v0 + OFFSET(Vertex, builtins.position.w));
+ Float w1 = *Pointer<Float>(v1 + OFFSET(Vertex, builtins.position.w));
+ Float w2 = *Pointer<Float>(v2 + OFFSET(Vertex, builtins.position.w));
Float4 w012;
@@ -313,15 +312,15 @@
w012.z = w2;
w012.w = 1;
- Float rhw0 = *Pointer<Float>(v0 + OFFSET(Vertex,W));
+ Float rhw0 = *Pointer<Float>(v0 + OFFSET(Vertex,projected.w));
- Int X0 = *Pointer<Int>(v0 + OFFSET(Vertex,X));
- Int X1 = *Pointer<Int>(v1 + OFFSET(Vertex,X));
- Int X2 = *Pointer<Int>(v2 + OFFSET(Vertex,X));
+ Int X0 = *Pointer<Int>(v0 + OFFSET(Vertex,projected.x));
+ Int X1 = *Pointer<Int>(v1 + OFFSET(Vertex,projected.x));
+ Int X2 = *Pointer<Int>(v2 + OFFSET(Vertex,projected.x));
- Int Y0 = *Pointer<Int>(v0 + OFFSET(Vertex,Y));
- Int Y1 = *Pointer<Int>(v1 + OFFSET(Vertex,Y));
- Int Y2 = *Pointer<Int>(v2 + OFFSET(Vertex,Y));
+ Int Y0 = *Pointer<Int>(v0 + OFFSET(Vertex,projected.y));
+ Int Y1 = *Pointer<Int>(v1 + OFFSET(Vertex,projected.y));
+ Int Y2 = *Pointer<Int>(v2 + OFFSET(Vertex,projected.y));
if(line)
{
@@ -396,9 +395,9 @@
if(state.interpolateZ)
{
- Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex,Z));
- Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex,Z));
- Float z2 = *Pointer<Float>(v2 + OFFSET(Vertex,Z));
+ Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex,projected.z));
+ Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex,projected.z));
+ Float z2 = *Pointer<Float>(v2 + OFFSET(Vertex,projected.z));
z1 -= z0;
z2 -= z0;
@@ -451,19 +450,16 @@
*Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) = C;
}
- for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
+ for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
{
- for(int component = 0; component < 4; component++)
- {
- int attribute = state.gradient[interpolant][component].attribute;
- bool flat = state.gradient[interpolant][component].flat;
- bool wrap = state.gradient[interpolant][component].wrap;
-
- if(attribute != Unused)
- {
- setupGradient(primitive, tri, w012, M, v0, v1, v2, OFFSET(Vertex,v[attribute][component]), OFFSET(Primitive,V[interpolant][component]), flat, point, state.perspective, wrap, component);
- }
- }
+ // TODO: fix point, perspective, etc. Not convinced various edge cases are really correct here for either VK or GL.
+ if (state.gradient[interpolant].Type != SpirvShader::ATTRIBTYPE_UNUSED)
+ setupGradient(primitive, tri, w012, M, v0, v1, v2,
+ OFFSET(Vertex, v[interpolant]),
+ OFFSET(Primitive, V[interpolant]),
+ state.gradient[interpolant].Flat,
+ point,
+ state.perspective, 0);
}
Return(true);
@@ -472,7 +468,7 @@
routine = function("SetupRoutine");
}
- void SetupRoutine::setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flat, bool sprite, bool perspective, bool wrap, int component)
+ void SetupRoutine::setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flat, bool sprite, bool perspective, int component)
{
Float4 i;
@@ -505,21 +501,6 @@
i.w = 0;
}
- if(wrap)
- {
- Float m;
-
- m = *Pointer<Float>(v0 + attribute);
- m = Max(m, *Pointer<Float>(v1 + attribute));
- m = Max(m, *Pointer<Float>(v2 + attribute));
- m -= 0.5f;
-
- // TODO: Vectorize
- If(Float(i.x) < m) i.x = i.x + 1.0f;
- If(Float(i.y) < m) i.y = i.y + 1.0f;
- If(Float(i.z) < m) i.z = i.z + 1.0f;
- }
-
if(!perspective)
{
i *= w012;
diff --git a/src/Pipeline/SetupRoutine.hpp b/src/Pipeline/SetupRoutine.hpp
index 977eb8f..b43dd7a 100644
--- a/src/Pipeline/SetupRoutine.hpp
+++ b/src/Pipeline/SetupRoutine.hpp
@@ -33,7 +33,7 @@
Routine *getRoutine();
private:
- void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool sprite, bool perspective, bool wrap, int component);
+ void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool sprite, bool perspective, int component);
void edge(Pointer<Byte> &primitive, Pointer<Byte> &data, const Int &Xa, const Int &Ya, const Int &Xb, const Int &Yb, Int &q);
void conditionalRotate1(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2);
void conditionalRotate2(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2);
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp
index 1e0e162..7bb498f 100644
--- a/src/Pipeline/VertexProgram.cpp
+++ b/src/Pipeline/VertexProgram.cpp
@@ -32,11 +32,14 @@
enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
- // TODO: wire up builtins
- //if(shader->isInstanceIdDeclared())
- //{
- // instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
- //}
+ auto it = spirvShader->inputBuiltins.find(spv::BuiltInInstanceIndex);
+ if (it != spirvShader->inputBuiltins.end())
+ {
+ // TODO: we could do better here; we know InstanceIndex is uniform across all lanes
+ assert(it->second.SizeInComponents == 1);
+ (*routine.lvalues[it->second.Id])[it->second.FirstComponent] =
+ As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, instanceID)))));
+ }
}
VertexProgram::~VertexProgram()
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index 636351f..e1f0b3d 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <Device/Vertex.hpp>
#include "VertexRoutine.hpp"
#include "Constants.hpp"
@@ -94,14 +95,21 @@
void VertexRoutine::computeClipFlags()
{
- int pos = state.positionRegister;
+ auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
+ assert(it != spirvShader->outputBuiltins.end());
+ assert(it->second.SizeInComponents == 4);
+ auto &pos = (*routine.lvalues[it->second.Id]);
+ auto posX = pos[it->second.FirstComponent];
+ auto posY = pos[it->second.FirstComponent + 1];
+ auto posZ = pos[it->second.FirstComponent + 2];
+ auto posW = pos[it->second.FirstComponent + 3];
- Int4 maxX = CmpLT(o[pos].w, o[pos].x);
- Int4 maxY = CmpLT(o[pos].w, o[pos].y);
- Int4 maxZ = CmpLT(o[pos].w, o[pos].z);
- Int4 minX = CmpNLE(-o[pos].w, o[pos].x);
- Int4 minY = CmpNLE(-o[pos].w, o[pos].y);
- Int4 minZ = CmpNLE(Float4(0.0f), o[pos].z);
+ Int4 maxX = CmpLT(posW, posX);
+ Int4 maxY = CmpLT(posW, posY);
+ Int4 maxZ = CmpLT(posW, posZ);
+ Int4 minX = CmpNLE(-posW, posX);
+ Int4 minY = CmpNLE(-posW, posY);
+ Int4 minZ = CmpNLE(Float4(0.0f), posZ);
clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4);
@@ -110,9 +118,9 @@
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4);
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4);
- Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
- Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
- Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
+ Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
+ Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
+ Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteXYZ = finiteX & finiteY & finiteZ;
clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4);
@@ -658,12 +666,28 @@
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF;
// Viewport transform
- int pos = state.positionRegister;
+ auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
+ assert(it != spirvShader->outputBuiltins.end());
+ assert(it->second.SizeInComponents == 4);
+ auto &pos = (*routine.lvalues[it->second.Id]);
+ auto posX = pos[it->second.FirstComponent];
+ auto posY = pos[it->second.FirstComponent + 1];
+ auto posZ = pos[it->second.FirstComponent + 2];
+ auto posW = pos[it->second.FirstComponent + 3];
- v.x = o[pos].x;
- v.y = o[pos].y;
- v.z = o[pos].z;
- v.w = o[pos].w;
+ v.x = posX;
+ v.y = posY;
+ v.z = posZ;
+ v.w = posW;
+
+ // Write the builtin pos into the vertex; it's not going to be consumed by the FS, but may need to reproject if we have to clip.
+ Vector4f v2 = v;
+ transpose4x4(v2.x, v2.y, v2.z, v2.w);
+
+ *Pointer<Float4>(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 0, 16) = v2.x;
+ *Pointer<Float4>(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 1, 16) = v2.y;
+ *Pointer<Float4>(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 2, 16) = v2.z;
+ *Pointer<Float4>(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 3, 16) = v2.w;
Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
Float4 rhw = Float4(1.0f) / w;
@@ -675,23 +699,26 @@
transpose4x4(v.x, v.y, v.z, v.w);
- *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x;
- *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y;
- *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z;
- *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w;
+ *Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 0, 16) = v.x;
+ *Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 1, 16) = v.y;
+ *Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 2, 16) = v.z;
+ *Pointer<Float4>(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 3, 16) = v.w;
}
void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache)
{
- for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
+ for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
{
- if(state.output[i].write)
+ if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED)
{
- *Pointer<Int4>(vertex + OFFSET(Vertex,v[i]), 16) = *Pointer<Int4>(cache + OFFSET(Vertex,v[i]), 16);
+ *Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cache + OFFSET(Vertex, v[i]), 4);
}
}
- *Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X));
+ *Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cache + OFFSET(Vertex,projected));
*Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags));
+ *Pointer<Int4>(vertex + OFFSET(Vertex,builtins.position)) = *Pointer<Int4>(cache + OFFSET(Vertex,builtins.position));
+ *Pointer<Int>(vertex + OFFSET(Vertex,builtins.pointSize)) = *Pointer<Int>(cache + OFFSET(Vertex,builtins.pointSize));
+
}
}