Make the number of vertex outputs configurable.

Change-Id: I17ae53e5274232e9e3b482daac56d507788e822c
Reviewed-on: https://swiftshader-review.googlesource.com/5383
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
diff --git a/src/D3D9/Capabilities.hpp b/src/D3D9/Capabilities.hpp
index 321160b..1421c80 100644
--- a/src/D3D9/Capabilities.hpp
+++ b/src/D3D9/Capabilities.hpp
@@ -15,6 +15,9 @@
 #ifndef D3D9_Capabilities_hpp
 #define D3D9_Capabilities_hpp
 
+#include "Config.hpp"
+#include "MetaMacro.hpp"
+
 #include <d3d9.h>
 
 namespace D3D9
@@ -465,6 +468,23 @@
 
 	extern unsigned int textureMemory;
 	extern unsigned int maxAnisotropy;
+
+	enum
+	{
+		MAX_VERTEX_SHADER_CONST = 256,
+		MAX_PIXEL_SHADER_CONST = 224,
+		MAX_VERTEX_OUTPUTS = 12,
+	};
+
+	// Shader Model 3.0 requirements
+	META_ASSERT(MAX_VERTEX_SHADER_CONST >= 256);
+	META_ASSERT(MAX_PIXEL_SHADER_CONST == 224);
+	META_ASSERT(MAX_VERTEX_OUTPUTS == 12);
+
+	// Back-end minimum requirements
+	META_ASSERT(sw::VERTEX_UNIFORM_VECTORS >= MAX_VERTEX_SHADER_CONST);
+	META_ASSERT(sw::FRAGMENT_UNIFORM_VECTORS >= MAX_PIXEL_SHADER_CONST);
+	META_ASSERT(sw::MAX_VERTEX_OUTPUTS >= MAX_VERTEX_OUTPUTS);
 }
 
 #endif   // D3D9_Capabilities_hpp
diff --git a/src/D3D9/Direct3DDevice9.cpp b/src/D3D9/Direct3DDevice9.cpp
index 60f9173..3414a4a 100644
--- a/src/D3D9/Direct3DDevice9.cpp
+++ b/src/D3D9/Direct3DDevice9.cpp
@@ -5800,7 +5800,7 @@
 				}
 				else   // Bind directly to the output
 				{
-					for(int i = 0; i < 12; i++)
+					for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
 					{
 						if((usage == shader->output[i][0].usage || (usage == D3DDECLUSAGE_POSITIONT && shader->output[i][0].usage == D3DDECLUSAGE_POSITION)) &&
 						    index == shader->output[i][0].index)
diff --git a/src/D3D9/Direct3DDevice9.hpp b/src/D3D9/Direct3DDevice9.hpp
index cf9c7d6..da78c9b 100644
--- a/src/D3D9/Direct3DDevice9.hpp
+++ b/src/D3D9/Direct3DDevice9.hpp
@@ -19,6 +19,7 @@
 
 #include "Direct3D9.hpp"
 #include "Direct3DSwapChain9.hpp"
+#include "Capabilities.hpp"
 
 #include "Stream.hpp"
 
@@ -45,12 +46,6 @@
 	class Direct3DIndexBuffer9;
 	class CriticalSection;
 
-	enum
-	{
-		MAX_VERTEX_SHADER_CONST = MIN(256, sw::VERTEX_UNIFORM_VECTORS),
-		MAX_PIXEL_SHADER_CONST = MIN(224, sw::FRAGMENT_UNIFORM_VECTORS),
-	};
-
 	class Direct3DDevice9 : public IDirect3DDevice9, public Unknown
 	{
 		friend CriticalSection;
diff --git a/src/Main/Config.hpp b/src/Main/Config.hpp
index 7150750..7411080 100644
--- a/src/Main/Config.hpp
+++ b/src/Main/Config.hpp
@@ -87,6 +87,7 @@
 		TOTAL_IMAGE_UNITS = TEXTURE_IMAGE_UNITS + VERTEX_TEXTURE_IMAGE_UNITS,
 		FRAGMENT_UNIFORM_VECTORS = 224,
 		VERTEX_UNIFORM_VECTORS = 256,
+		MAX_VERTEX_OUTPUTS = 12,
 		MAX_FRAGMENT_UNIFORM_BLOCKS = 12,
 		MAX_VERTEX_UNIFORM_BLOCKS = 12,
 		MAX_UNIFORM_BUFFER_BINDINGS = MAX_FRAGMENT_UNIFORM_BLOCKS + MAX_VERTEX_UNIFORM_BLOCKS,   // Limited to 127 by SourceParameter.bufferIndex in Shader.hpp
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index a27b30e..b427b26 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp
@@ -2787,7 +2787,7 @@
 			}
 			else if(vertexShader)
 			{
-				if((var + registerCount) > sw::VertexShader::MAX_OUTPUT_VARYINGS)
+				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
 				{
 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
 					return 0;
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index 40d9de6..712d74a 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -400,7 +400,7 @@
 					draw->pUniformBuffers[i] = nullptr;
 				}
 			}
-			
+
 			if(context->pixelShaderVersion() <= 0x0104)
 			{
 				for(int stage = 0; stage < 8; stage++)
@@ -736,7 +736,7 @@
 				}
 			}
 		}
-	
+
 		// Find primitive tasks
 		if(currentDraw == nextDraw)
 		{
@@ -837,7 +837,7 @@
 		case Task::PRIMITIVES:
 			{
 				int unit = task[threadIndex].primitiveUnit;
-			
+
 				int input = primitiveProgress[unit].firstPrimitive;
 				int count = primitiveProgress[unit].primitiveCount;
 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
@@ -1571,7 +1571,7 @@
 
 		return visible;
 	}
-	
+
 	int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
 	{
 		Triangle *triangle = renderer->triangleBatch[unit];
@@ -1878,7 +1878,7 @@
 
 		int pts = state.pointSizeRegister;
 
-		if(state.pointSizeRegister != 0xF)
+		if(state.pointSizeRegister != Unused)
 		{
 			pSize = v.v[pts].y;
 		}
@@ -1937,7 +1937,7 @@
 					return false;
 				}
 			}
-			
+
 			return setupRoutine(&primitive, &triangle, &polygon, &data);
 		}
 
@@ -2002,7 +2002,7 @@
 				exitThreads = true;
 				resume[thread]->signal();
 				worker[thread]->join();
-				
+
 				delete worker[thread];
 				worker[thread] = 0;
 				delete resume[thread];
@@ -2010,7 +2010,7 @@
 				delete suspend[thread];
 				suspend[thread] = 0;
 			}
-		
+
 			deallocate(vertexTask[thread]);
 			vertexTask[thread] = 0;
 		}
@@ -2137,7 +2137,7 @@
 				return true;
 			}
 		}
-	
+
 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
 		{
 			return true;
@@ -2145,7 +2145,7 @@
 
 		return false;
 	}
-	
+
 	void Renderer::updateClipper()
 	{
 		if(updateClipPlanes)
@@ -2185,7 +2185,7 @@
 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
 	{
 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
-		
+
 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
 	}
 
@@ -2530,7 +2530,7 @@
 	{
 		queries.push_back(query);
 	}
-	
+
 	void Renderer::removeQuery(Query *query)
 	{
 		queries.remove(query);
@@ -2541,7 +2541,7 @@
 		{
 			return threadCount;
 		}
-		
+
 		int64_t Renderer::getVertexTime(int thread)
 		{
 			return vertexTime[thread];
@@ -2551,7 +2551,7 @@
 		{
 			return setupTime[thread];
 		}
-			
+
 		int64_t Renderer::getPixelTime(int thread)
 		{
 			return pixelTime[thread];
diff --git a/src/Renderer/SetupProcessor.cpp b/src/Renderer/SetupProcessor.cpp
index 08e52c9..6dccc8d 100644
--- a/src/Renderer/SetupProcessor.cpp
+++ b/src/Renderer/SetupProcessor.cpp
@@ -89,7 +89,7 @@
 		state.vFace = context->pixelShader && context->pixelShader->vFaceDeclared;
 
 		state.positionRegister = Pos;
-		state.pointSizeRegister = 0xF;   // No vertex point size
+		state.pointSizeRegister = Unused;
 
 		state.multiSample = context->getMultiSampleCount();
 		state.rasterizerDiscard = context->rasterizerDiscard;
@@ -133,7 +133,7 @@
 					if(context->pixelShader->semantic[interpolant][component - project].active())
 					{
 						int input = interpolant;
-						for(int i = 0; i < 12; i++)
+						for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
 						{
 							if(context->pixelShader->semantic[interpolant][component - project] == context->vertexShader->output[i][component - project])
 							{
diff --git a/src/Renderer/SetupProcessor.hpp b/src/Renderer/SetupProcessor.hpp
index 06e4ce9..2ef9fdf 100644
--- a/src/Renderer/SetupProcessor.hpp
+++ b/src/Renderer/SetupProcessor.hpp
@@ -45,8 +45,8 @@
 			bool interpolateW              : 1;
 			bool perspective               : 1;
 			bool pointSprite               : 1;
-			unsigned int positionRegister  : 4;
-			unsigned int pointSizeRegister : 4;
+			unsigned int positionRegister  : BITS(VERTEX_OUTPUT_LAST);
+			unsigned int pointSizeRegister : BITS(VERTEX_OUTPUT_LAST);
 			CullMode cullMode              : BITS(CULL_LAST);
 			bool twoSidedStencil           : 1;
 			bool slopeDepthBias            : 1;
@@ -56,7 +56,7 @@
 
 			struct Gradient
 			{
-				unsigned char attribute : BITS(Unused);
+				unsigned char attribute : BITS(VERTEX_OUTPUT_LAST);
 				bool flat               : 1;
 				bool wrap               : 1;
 			};
diff --git a/src/Renderer/Vertex.hpp b/src/Renderer/Vertex.hpp
index 5c9e504..78bcf14 100644
--- a/src/Renderer/Vertex.hpp
+++ b/src/Renderer/Vertex.hpp
@@ -18,11 +18,13 @@
 #include "Color.hpp"
 #include "Common/MetaMacro.hpp"
 #include "Common/Types.hpp"
+#include "Main/Config.hpp"
 
 namespace sw
 {
-	enum Out   // Default vertex output semantic
+	enum Out
 	{
+		// Default vertex output semantics
 		Pos = 0,
 		C0 = 1,   // Diffuse
 		C1 = 2,   // Specular
@@ -36,7 +38,13 @@
 		T7 = 10,
 		Fog = 11,    // x component
 		Pts = Fog,   // y component
-		Unused
+
+		// Variable semantics
+		V0 = 0,
+		Vn_1 = MAX_VERTEX_OUTPUTS - 1,
+
+		Unused,
+		VERTEX_OUTPUT_LAST = Unused,
 	};
 
 	struct UVWQ
@@ -72,7 +80,7 @@
 				float pSize;         // Point size
 			};
 
-			float4 v[12];   // Generic components using semantic declaration
+			float4 v[MAX_VERTEX_OUTPUTS];   // Generic components using semantic declaration
 		};
 
 		// Projected coordinates
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index fa13b2b..291e532 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -942,7 +942,7 @@
 
 		if(context->vertexShader)   // FIXME: Also when pre-transformed?
 		{
-			for(int i = 0; i < 12; i++)
+			for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
 			{
 				state.output[i].xWrite = context->vertexShader->output[i][0].active();
 				state.output[i].yWrite = context->vertexShader->output[i][1].active();
diff --git a/src/Renderer/VertexProcessor.hpp b/src/Renderer/VertexProcessor.hpp
index 14758bb..bcbf4fd 100644
--- a/src/Renderer/VertexProcessor.hpp
+++ b/src/Renderer/VertexProcessor.hpp
@@ -52,8 +52,8 @@
 
 			bool fixedFunction             : 1;
 			bool textureSampling           : 1;
-			unsigned int positionRegister  : 4;
-			unsigned int pointSizeRegister : 4;   // 0xF signifies no vertex point size
+			unsigned int positionRegister  : BITS(MAX_VERTEX_OUTPUTS);
+			unsigned int pointSizeRegister : BITS(MAX_VERTEX_OUTPUTS);
 
 			unsigned int vertexBlendMatrixCount               : 3;
 			bool indexedVertexBlendEnable                     : 1;
@@ -134,7 +134,7 @@
 			};
 
 			Input input[VERTEX_ATTRIBUTES];
-			Output output[12];
+			Output output[MAX_VERTEX_OUTPUTS];
 		};
 
 		struct State : States
diff --git a/src/Shader/SetupRoutine.cpp b/src/Shader/SetupRoutine.cpp
index 36753bd..630fbf4 100644
--- a/src/Shader/SetupRoutine.cpp
+++ b/src/Shader/SetupRoutine.cpp
@@ -144,7 +144,7 @@
 			If(m != 0 || Bool(!solidTriangle))   // Clipped triangle; reproject
 			{
 				Pointer<Byte> V = polygon + OFFSET(Polygon,P) + m * sizeof(void*) * 16;
-		
+
 				Int i = 0;
 
 				Do
@@ -166,9 +166,9 @@
 			// Vertical range
 			Int yMin = Y[0];
 			Int yMax = Y[0];
-		
+
 			Int i = 1;
-			
+
 			Do
 			{
 				yMin = Min(Y[i], yMin);
@@ -196,7 +196,7 @@
 
 			yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0)));
 			yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData,scissorY1)));
-		
+
 			For(Int q = 0, q < state.multiSample, q++)
 			{
 				Array<Int> Xq(16);
@@ -510,7 +510,7 @@
 				if(component == 1) i.z = 1.0f;
 				if(component == 2) i.z = 0.0f;
 				if(component == 3) i.z = 1.0f;
-				
+
 				i.w = 0;
 			}
 
@@ -597,7 +597,7 @@
 				Int ceil = -d >> 31;   // Ceiling division: remainder <= 0
 				x -= ceil;
 				d -= ceil & FDY12;
-				
+
 				Int Q = FDX12 / FDY12;   // Edge-step
 				Int R = FDX12 % FDY12;   // Error-step
 				Int floor = R >> 31;     // Flooring division: remainder >= 0
@@ -615,7 +615,7 @@
 					d += R;
 
 					Int overflow = -d >> 31;
-			
+
 					d -= D & overflow;
 					x -= overflow;
 
@@ -632,7 +632,7 @@
 			If(condition)
 			{
 				Pointer<Byte> vX;
-			
+
 				vX = v0;
 				v0 = v1;
 				v1 = v2;
@@ -652,7 +652,7 @@
 			If(condition)
 			{
 				Pointer<Byte> vX;
-			
+
 				vX = v2;
 				v2 = v1;
 				v1 = v0;
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp
index 46858c9..deb6180 100644
--- a/src/Shader/Shader.cpp
+++ b/src/Shader/Shader.cpp
@@ -119,7 +119,7 @@
 		predicate = false;
 		predicateNot = false;
 		predicateSwizzle = 0xE4;
-		
+
 		coissue = false;
 		samplerType = SAMPLER_UNKNOWN;
 		usage = USAGE_POSITION;
@@ -162,7 +162,7 @@
 					token++;
 					size--;
 				}
-				
+
 				token++;
 				size--;
 			}
@@ -173,7 +173,7 @@
 
 				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
 				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
-				
+
 				token++;
 				size--;
 			}
@@ -201,11 +201,11 @@
 	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
 	{
 		std::string instructionString;
-		
+
 		if(opcode != OPCODE_DCL)
 		{
 			instructionString += coissue ? "+ " : "";
-			
+
 			if(predicate)
 			{
 				instructionString += predicateNot ? "(!p0" : "(p0";
@@ -219,7 +219,7 @@
 			{
 				instructionString += " " + dst.string(shaderType, version) +
 				                           dst.relativeString() +
-				                           dst.maskString(); 
+				                           dst.maskString();
 			}
 
 			for(int i = 0; i < 4; i++)
@@ -229,8 +229,8 @@
 					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
 					instructionString += src[i].preModifierString() +
 										 src[i].string(shaderType, version) +
-										 src[i].relativeString() + 
-										 src[i].postModifierString() + 
+										 src[i].relativeString() +
+										 src[i].postModifierString() +
 										 src[i].swizzleString();
 				}
 			}
@@ -351,10 +351,10 @@
 		{
 		case 0:		return "";
 		case 1:		return "_x2";
-		case 2:		return "_x4"; 
+		case 2:		return "_x4";
 		case 3:		return "_x8";
 		case -1:	return "_d2";
-		case -2:	return "_d4"; 
+		case -2:	return "_d4";
 		case -3:	return "_d8";
 		default:
 			return "";
@@ -630,7 +630,7 @@
 		src[i].rel.type = PARAMETER_VOID;
 		src[i].rel.swizzle = 0x00;
 		src[i].rel.scale = 1;
-		
+
 		switch(opcode)
 		{
 		case OPCODE_DEF:
@@ -1018,7 +1018,7 @@
 		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
 		{
 			buffer << index;
-			
+
 			return typeString(shaderType, version) + buffer.str();
 		}
 		else
@@ -1079,7 +1079,7 @@
 	{
 		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
 	}
-	
+
 	bool Shader::Instruction::isCall() const
 	{
 		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
@@ -1160,7 +1160,7 @@
 			token += 1 + tokenCount;
 		}
 	}
-	
+
 	int Shader::size(unsigned long opcode) const
 	{
 		return size(opcode, version);
@@ -1633,7 +1633,7 @@
 				{
 					containsLeave = true;
 				}
-				
+
 				if(instruction[i]->isBreak())
 				{
 					containsBreak = true;
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index 26608f4..24a6693 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -594,7 +594,7 @@
 	{
 		if(shader)
 		{
-			for(int i = 0; i < 12; i++)
+			for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
 			{
 				unsigned char usage = shader->output[i][0].usage;
 
@@ -1263,7 +1263,7 @@
 		loopRepDepth--;
 
 		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
-		
+
 		Nucleus::createBr(loopRepEndBlock[loopRepDepth]);
 		Nucleus::setInsertBlock(endBlock);
 
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index 0eced49..1affb45 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -580,7 +580,7 @@
 	{
 		Vector4f v;
 
-		for(int i = 0; i < 12; i++)
+		for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
 		{
 			if(state.output[i].write)
 			{
@@ -669,7 +669,7 @@
 
 	void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache)
 	{
-		for(int i = 0; i < 12; i++)
+		for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
 		{
 			if(state.output[i].write)
 			{
diff --git a/src/Shader/VertexRoutine.hpp b/src/Shader/VertexRoutine.hpp
index 71aeb5b..d97d8ed 100644
--- a/src/Shader/VertexRoutine.hpp
+++ b/src/Shader/VertexRoutine.hpp
@@ -49,7 +49,7 @@
 		Int clipFlags;
 
 		RegisterArray<16> v;   // Varying registers
-		RegisterArray<12> o;   // Output registers
+		RegisterArray<MAX_VERTEX_OUTPUTS> o;   // Output registers
 
 		const VertexProcessor::State &state;
 
diff --git a/src/Shader/VertexShader.cpp b/src/Shader/VertexShader.cpp
index cfbda0c..c488630 100644
--- a/src/Shader/VertexShader.cpp
+++ b/src/Shader/VertexShader.cpp
@@ -25,7 +25,7 @@
 	{
 		version = 0x0300;
 		positionRegister = Pos;
-		pointSizeRegister = -1;   // No vertex point size
+		pointSizeRegister = Unused;
 		instanceIdDeclared = false;
 
 		for(int i = 0; i < MAX_INPUT_ATTRIBUTES; i++)
@@ -57,7 +57,7 @@
 		parse(token);
 
 		positionRegister = Pos;
-		pointSizeRegister = -1;   // No vertex point size
+		pointSizeRegister = Unused;
 		instanceIdDeclared = false;
 
 		for(int i = 0; i < MAX_INPUT_ATTRIBUTES; i++)
diff --git a/src/Shader/VertexShader.hpp b/src/Shader/VertexShader.hpp
index f8760fb..2a76cd2 100644
--- a/src/Shader/VertexShader.hpp
+++ b/src/Shader/VertexShader.hpp
@@ -16,6 +16,7 @@
 #define sw_VertexShader_hpp
 
 #include "Shader.hpp"
+#include "Main/Config.hpp"
 
 namespace sw
 {
@@ -40,8 +41,7 @@
 		enum {MAX_INPUT_ATTRIBUTES = 16};
 		Semantic input[MAX_INPUT_ATTRIBUTES];       // FIXME: Private
 
-		enum {MAX_OUTPUT_VARYINGS = 12};
-		Semantic output[MAX_OUTPUT_VARYINGS][4];   // FIXME: Private
+		Semantic output[MAX_VERTEX_OUTPUTS][4];   // FIXME: Private
 
 	private:
 		void analyzeInput();