Make the number of vertex outputs configurable.
Change-Id: I17ae53e5274232e9e3b482daac56d507788e822c
Reviewed-on: https://swiftshader-review.googlesource.com/5383
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
diff --git a/src/D3D9/Capabilities.hpp b/src/D3D9/Capabilities.hpp
index 321160b..1421c80 100644
--- a/src/D3D9/Capabilities.hpp
+++ b/src/D3D9/Capabilities.hpp
@@ -15,6 +15,9 @@
#ifndef D3D9_Capabilities_hpp
#define D3D9_Capabilities_hpp
+#include "Config.hpp"
+#include "MetaMacro.hpp"
+
#include <d3d9.h>
namespace D3D9
@@ -465,6 +468,23 @@
extern unsigned int textureMemory;
extern unsigned int maxAnisotropy;
+
+ enum
+ {
+ MAX_VERTEX_SHADER_CONST = 256,
+ MAX_PIXEL_SHADER_CONST = 224,
+ MAX_VERTEX_OUTPUTS = 12,
+ };
+
+ // Shader Model 3.0 requirements
+ META_ASSERT(MAX_VERTEX_SHADER_CONST >= 256);
+ META_ASSERT(MAX_PIXEL_SHADER_CONST == 224);
+ META_ASSERT(MAX_VERTEX_OUTPUTS == 12);
+
+ // Back-end minimum requirements
+ META_ASSERT(sw::VERTEX_UNIFORM_VECTORS >= MAX_VERTEX_SHADER_CONST);
+ META_ASSERT(sw::FRAGMENT_UNIFORM_VECTORS >= MAX_PIXEL_SHADER_CONST);
+ META_ASSERT(sw::MAX_VERTEX_OUTPUTS >= MAX_VERTEX_OUTPUTS);
}
#endif // D3D9_Capabilities_hpp
diff --git a/src/D3D9/Direct3DDevice9.cpp b/src/D3D9/Direct3DDevice9.cpp
index 60f9173..3414a4a 100644
--- a/src/D3D9/Direct3DDevice9.cpp
+++ b/src/D3D9/Direct3DDevice9.cpp
@@ -5800,7 +5800,7 @@
}
else // Bind directly to the output
{
- for(int i = 0; i < 12; i++)
+ for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
{
if((usage == shader->output[i][0].usage || (usage == D3DDECLUSAGE_POSITIONT && shader->output[i][0].usage == D3DDECLUSAGE_POSITION)) &&
index == shader->output[i][0].index)
diff --git a/src/D3D9/Direct3DDevice9.hpp b/src/D3D9/Direct3DDevice9.hpp
index cf9c7d6..da78c9b 100644
--- a/src/D3D9/Direct3DDevice9.hpp
+++ b/src/D3D9/Direct3DDevice9.hpp
@@ -19,6 +19,7 @@
#include "Direct3D9.hpp"
#include "Direct3DSwapChain9.hpp"
+#include "Capabilities.hpp"
#include "Stream.hpp"
@@ -45,12 +46,6 @@
class Direct3DIndexBuffer9;
class CriticalSection;
- enum
- {
- MAX_VERTEX_SHADER_CONST = MIN(256, sw::VERTEX_UNIFORM_VECTORS),
- MAX_PIXEL_SHADER_CONST = MIN(224, sw::FRAGMENT_UNIFORM_VECTORS),
- };
-
class Direct3DDevice9 : public IDirect3DDevice9, public Unknown
{
friend CriticalSection;
diff --git a/src/Main/Config.hpp b/src/Main/Config.hpp
index 7150750..7411080 100644
--- a/src/Main/Config.hpp
+++ b/src/Main/Config.hpp
@@ -87,6 +87,7 @@
TOTAL_IMAGE_UNITS = TEXTURE_IMAGE_UNITS + VERTEX_TEXTURE_IMAGE_UNITS,
FRAGMENT_UNIFORM_VECTORS = 224,
VERTEX_UNIFORM_VECTORS = 256,
+ MAX_VERTEX_OUTPUTS = 12,
MAX_FRAGMENT_UNIFORM_BLOCKS = 12,
MAX_VERTEX_UNIFORM_BLOCKS = 12,
MAX_UNIFORM_BUFFER_BINDINGS = MAX_FRAGMENT_UNIFORM_BLOCKS + MAX_VERTEX_UNIFORM_BLOCKS, // Limited to 127 by SourceParameter.bufferIndex in Shader.hpp
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index a27b30e..b427b26 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp
@@ -2787,7 +2787,7 @@
}
else if(vertexShader)
{
- if((var + registerCount) > sw::VertexShader::MAX_OUTPUT_VARYINGS)
+ if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
{
mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
return 0;
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index 40d9de6..712d74a 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -400,7 +400,7 @@
draw->pUniformBuffers[i] = nullptr;
}
}
-
+
if(context->pixelShaderVersion() <= 0x0104)
{
for(int stage = 0; stage < 8; stage++)
@@ -736,7 +736,7 @@
}
}
}
-
+
// Find primitive tasks
if(currentDraw == nextDraw)
{
@@ -837,7 +837,7 @@
case Task::PRIMITIVES:
{
int unit = task[threadIndex].primitiveUnit;
-
+
int input = primitiveProgress[unit].firstPrimitive;
int count = primitiveProgress[unit].primitiveCount;
DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
@@ -1571,7 +1571,7 @@
return visible;
}
-
+
int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
{
Triangle *triangle = renderer->triangleBatch[unit];
@@ -1878,7 +1878,7 @@
int pts = state.pointSizeRegister;
- if(state.pointSizeRegister != 0xF)
+ if(state.pointSizeRegister != Unused)
{
pSize = v.v[pts].y;
}
@@ -1937,7 +1937,7 @@
return false;
}
}
-
+
return setupRoutine(&primitive, &triangle, &polygon, &data);
}
@@ -2002,7 +2002,7 @@
exitThreads = true;
resume[thread]->signal();
worker[thread]->join();
-
+
delete worker[thread];
worker[thread] = 0;
delete resume[thread];
@@ -2010,7 +2010,7 @@
delete suspend[thread];
suspend[thread] = 0;
}
-
+
deallocate(vertexTask[thread]);
vertexTask[thread] = 0;
}
@@ -2137,7 +2137,7 @@
return true;
}
}
-
+
if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
{
return true;
@@ -2145,7 +2145,7 @@
return false;
}
-
+
void Renderer::updateClipper()
{
if(updateClipPlanes)
@@ -2185,7 +2185,7 @@
void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
{
ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
-
+
context->sampler[sampler].setTextureLevel(face, level, surface, type);
}
@@ -2530,7 +2530,7 @@
{
queries.push_back(query);
}
-
+
void Renderer::removeQuery(Query *query)
{
queries.remove(query);
@@ -2541,7 +2541,7 @@
{
return threadCount;
}
-
+
int64_t Renderer::getVertexTime(int thread)
{
return vertexTime[thread];
@@ -2551,7 +2551,7 @@
{
return setupTime[thread];
}
-
+
int64_t Renderer::getPixelTime(int thread)
{
return pixelTime[thread];
diff --git a/src/Renderer/SetupProcessor.cpp b/src/Renderer/SetupProcessor.cpp
index 08e52c9..6dccc8d 100644
--- a/src/Renderer/SetupProcessor.cpp
+++ b/src/Renderer/SetupProcessor.cpp
@@ -89,7 +89,7 @@
state.vFace = context->pixelShader && context->pixelShader->vFaceDeclared;
state.positionRegister = Pos;
- state.pointSizeRegister = 0xF; // No vertex point size
+ state.pointSizeRegister = Unused;
state.multiSample = context->getMultiSampleCount();
state.rasterizerDiscard = context->rasterizerDiscard;
@@ -133,7 +133,7 @@
if(context->pixelShader->semantic[interpolant][component - project].active())
{
int input = interpolant;
- for(int i = 0; i < 12; i++)
+ for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
{
if(context->pixelShader->semantic[interpolant][component - project] == context->vertexShader->output[i][component - project])
{
diff --git a/src/Renderer/SetupProcessor.hpp b/src/Renderer/SetupProcessor.hpp
index 06e4ce9..2ef9fdf 100644
--- a/src/Renderer/SetupProcessor.hpp
+++ b/src/Renderer/SetupProcessor.hpp
@@ -45,8 +45,8 @@
bool interpolateW : 1;
bool perspective : 1;
bool pointSprite : 1;
- unsigned int positionRegister : 4;
- unsigned int pointSizeRegister : 4;
+ unsigned int positionRegister : BITS(VERTEX_OUTPUT_LAST);
+ unsigned int pointSizeRegister : BITS(VERTEX_OUTPUT_LAST);
CullMode cullMode : BITS(CULL_LAST);
bool twoSidedStencil : 1;
bool slopeDepthBias : 1;
@@ -56,7 +56,7 @@
struct Gradient
{
- unsigned char attribute : BITS(Unused);
+ unsigned char attribute : BITS(VERTEX_OUTPUT_LAST);
bool flat : 1;
bool wrap : 1;
};
diff --git a/src/Renderer/Vertex.hpp b/src/Renderer/Vertex.hpp
index 5c9e504..78bcf14 100644
--- a/src/Renderer/Vertex.hpp
+++ b/src/Renderer/Vertex.hpp
@@ -18,11 +18,13 @@
#include "Color.hpp"
#include "Common/MetaMacro.hpp"
#include "Common/Types.hpp"
+#include "Main/Config.hpp"
namespace sw
{
- enum Out // Default vertex output semantic
+ enum Out
{
+ // Default vertex output semantics
Pos = 0,
C0 = 1, // Diffuse
C1 = 2, // Specular
@@ -36,7 +38,13 @@
T7 = 10,
Fog = 11, // x component
Pts = Fog, // y component
- Unused
+
+ // Variable semantics
+ V0 = 0,
+ Vn_1 = MAX_VERTEX_OUTPUTS - 1,
+
+ Unused,
+ VERTEX_OUTPUT_LAST = Unused,
};
struct UVWQ
@@ -72,7 +80,7 @@
float pSize; // Point size
};
- float4 v[12]; // Generic components using semantic declaration
+ float4 v[MAX_VERTEX_OUTPUTS]; // Generic components using semantic declaration
};
// Projected coordinates
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index fa13b2b..291e532 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -942,7 +942,7 @@
if(context->vertexShader) // FIXME: Also when pre-transformed?
{
- for(int i = 0; i < 12; i++)
+ for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
{
state.output[i].xWrite = context->vertexShader->output[i][0].active();
state.output[i].yWrite = context->vertexShader->output[i][1].active();
diff --git a/src/Renderer/VertexProcessor.hpp b/src/Renderer/VertexProcessor.hpp
index 14758bb..bcbf4fd 100644
--- a/src/Renderer/VertexProcessor.hpp
+++ b/src/Renderer/VertexProcessor.hpp
@@ -52,8 +52,8 @@
bool fixedFunction : 1;
bool textureSampling : 1;
- unsigned int positionRegister : 4;
- unsigned int pointSizeRegister : 4; // 0xF signifies no vertex point size
+ unsigned int positionRegister : BITS(MAX_VERTEX_OUTPUTS);
+ unsigned int pointSizeRegister : BITS(MAX_VERTEX_OUTPUTS);
unsigned int vertexBlendMatrixCount : 3;
bool indexedVertexBlendEnable : 1;
@@ -134,7 +134,7 @@
};
Input input[VERTEX_ATTRIBUTES];
- Output output[12];
+ Output output[MAX_VERTEX_OUTPUTS];
};
struct State : States
diff --git a/src/Shader/SetupRoutine.cpp b/src/Shader/SetupRoutine.cpp
index 36753bd..630fbf4 100644
--- a/src/Shader/SetupRoutine.cpp
+++ b/src/Shader/SetupRoutine.cpp
@@ -144,7 +144,7 @@
If(m != 0 || Bool(!solidTriangle)) // Clipped triangle; reproject
{
Pointer<Byte> V = polygon + OFFSET(Polygon,P) + m * sizeof(void*) * 16;
-
+
Int i = 0;
Do
@@ -166,9 +166,9 @@
// Vertical range
Int yMin = Y[0];
Int yMax = Y[0];
-
+
Int i = 1;
-
+
Do
{
yMin = Min(Y[i], yMin);
@@ -196,7 +196,7 @@
yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
-
+
For(Int q = 0, q < state.multiSample, q++)
{
Array<Int> Xq(16);
@@ -510,7 +510,7 @@
if(component == 1) i.z = 1.0f;
if(component == 2) i.z = 0.0f;
if(component == 3) i.z = 1.0f;
-
+
i.w = 0;
}
@@ -597,7 +597,7 @@
Int ceil = -d >> 31; // Ceiling division: remainder <= 0
x -= ceil;
d -= ceil & FDY12;
-
+
Int Q = FDX12 / FDY12; // Edge-step
Int R = FDX12 % FDY12; // Error-step
Int floor = R >> 31; // Flooring division: remainder >= 0
@@ -615,7 +615,7 @@
d += R;
Int overflow = -d >> 31;
-
+
d -= D & overflow;
x -= overflow;
@@ -632,7 +632,7 @@
If(condition)
{
Pointer<Byte> vX;
-
+
vX = v0;
v0 = v1;
v1 = v2;
@@ -652,7 +652,7 @@
If(condition)
{
Pointer<Byte> vX;
-
+
vX = v2;
v2 = v1;
v1 = v0;
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp
index 46858c9..deb6180 100644
--- a/src/Shader/Shader.cpp
+++ b/src/Shader/Shader.cpp
@@ -119,7 +119,7 @@
predicate = false;
predicateNot = false;
predicateSwizzle = 0xE4;
-
+
coissue = false;
samplerType = SAMPLER_UNKNOWN;
usage = USAGE_POSITION;
@@ -162,7 +162,7 @@
token++;
size--;
}
-
+
token++;
size--;
}
@@ -173,7 +173,7 @@
predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
-
+
token++;
size--;
}
@@ -201,11 +201,11 @@
std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
{
std::string instructionString;
-
+
if(opcode != OPCODE_DCL)
{
instructionString += coissue ? "+ " : "";
-
+
if(predicate)
{
instructionString += predicateNot ? "(!p0" : "(p0";
@@ -219,7 +219,7 @@
{
instructionString += " " + dst.string(shaderType, version) +
dst.relativeString() +
- dst.maskString();
+ dst.maskString();
}
for(int i = 0; i < 4; i++)
@@ -229,8 +229,8 @@
instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
instructionString += src[i].preModifierString() +
src[i].string(shaderType, version) +
- src[i].relativeString() +
- src[i].postModifierString() +
+ src[i].relativeString() +
+ src[i].postModifierString() +
src[i].swizzleString();
}
}
@@ -351,10 +351,10 @@
{
case 0: return "";
case 1: return "_x2";
- case 2: return "_x4";
+ case 2: return "_x4";
case 3: return "_x8";
case -1: return "_d2";
- case -2: return "_d4";
+ case -2: return "_d4";
case -3: return "_d8";
default:
return "";
@@ -630,7 +630,7 @@
src[i].rel.type = PARAMETER_VOID;
src[i].rel.swizzle = 0x00;
src[i].rel.scale = 1;
-
+
switch(opcode)
{
case OPCODE_DEF:
@@ -1018,7 +1018,7 @@
else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
{
buffer << index;
-
+
return typeString(shaderType, version) + buffer.str();
}
else
@@ -1079,7 +1079,7 @@
{
return opcode == OPCODE_IF || opcode == OPCODE_IFC;
}
-
+
bool Shader::Instruction::isCall() const
{
return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
@@ -1160,7 +1160,7 @@
token += 1 + tokenCount;
}
}
-
+
int Shader::size(unsigned long opcode) const
{
return size(opcode, version);
@@ -1633,7 +1633,7 @@
{
containsLeave = true;
}
-
+
if(instruction[i]->isBreak())
{
containsBreak = true;
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index 26608f4..24a6693 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -594,7 +594,7 @@
{
if(shader)
{
- for(int i = 0; i < 12; i++)
+ for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
{
unsigned char usage = shader->output[i][0].usage;
@@ -1263,7 +1263,7 @@
loopRepDepth--;
llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
-
+
Nucleus::createBr(loopRepEndBlock[loopRepDepth]);
Nucleus::setInsertBlock(endBlock);
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index 0eced49..1affb45 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -580,7 +580,7 @@
{
Vector4f v;
- for(int i = 0; i < 12; i++)
+ for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
{
if(state.output[i].write)
{
@@ -669,7 +669,7 @@
void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache)
{
- for(int i = 0; i < 12; i++)
+ for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
{
if(state.output[i].write)
{
diff --git a/src/Shader/VertexRoutine.hpp b/src/Shader/VertexRoutine.hpp
index 71aeb5b..d97d8ed 100644
--- a/src/Shader/VertexRoutine.hpp
+++ b/src/Shader/VertexRoutine.hpp
@@ -49,7 +49,7 @@
Int clipFlags;
RegisterArray<16> v; // Varying registers
- RegisterArray<12> o; // Output registers
+ RegisterArray<MAX_VERTEX_OUTPUTS> o; // Output registers
const VertexProcessor::State &state;
diff --git a/src/Shader/VertexShader.cpp b/src/Shader/VertexShader.cpp
index cfbda0c..c488630 100644
--- a/src/Shader/VertexShader.cpp
+++ b/src/Shader/VertexShader.cpp
@@ -25,7 +25,7 @@
{
version = 0x0300;
positionRegister = Pos;
- pointSizeRegister = -1; // No vertex point size
+ pointSizeRegister = Unused;
instanceIdDeclared = false;
for(int i = 0; i < MAX_INPUT_ATTRIBUTES; i++)
@@ -57,7 +57,7 @@
parse(token);
positionRegister = Pos;
- pointSizeRegister = -1; // No vertex point size
+ pointSizeRegister = Unused;
instanceIdDeclared = false;
for(int i = 0; i < MAX_INPUT_ATTRIBUTES; i++)
diff --git a/src/Shader/VertexShader.hpp b/src/Shader/VertexShader.hpp
index f8760fb..2a76cd2 100644
--- a/src/Shader/VertexShader.hpp
+++ b/src/Shader/VertexShader.hpp
@@ -16,6 +16,7 @@
#define sw_VertexShader_hpp
#include "Shader.hpp"
+#include "Main/Config.hpp"
namespace sw
{
@@ -40,8 +41,7 @@
enum {MAX_INPUT_ATTRIBUTES = 16};
Semantic input[MAX_INPUT_ATTRIBUTES]; // FIXME: Private
- enum {MAX_OUTPUT_VARYINGS = 12};
- Semantic output[MAX_OUTPUT_VARYINGS][4]; // FIXME: Private
+ Semantic output[MAX_VERTEX_OUTPUTS][4]; // FIXME: Private
private:
void analyzeInput();