Derive VertexRoutine from Function<>.

Bug 22652760

Change-Id: I48e9e1f3ff677429eff1aea2f80b1e384a537a14
Reviewed-on: https://swiftshader-review.googlesource.com/4557
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index 30e8cd5..5517820 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -73,7 +73,7 @@
 		P = 0;
 		PB = 0;
 		PBV = 0;
-		
+
 		for(int i = 0; i < 12; i++)
 		{
 			PBVM[i] = 0;
@@ -271,7 +271,7 @@
 	{
 		if(light < 8)
 		{
-			ff.attenuationConstant[light] = replicate(constant);			
+			ff.attenuationConstant[light] = replicate(constant);
 			ff.attenuationLinear[light] = replicate(linear);
 			ff.attenuationQuadratic[light] = replicate(quadratic);
 		}
@@ -707,7 +707,7 @@
 		{
 			PB = P * B;
 			PBV = PB * V;
-			
+
 			for(int i = 0; i < activeMatrices; i++)
 			{
 				PBVM[i] = PBV * M[i];
@@ -723,7 +723,7 @@
 		{
 			PB = P * B;
 			PBV = PB * V;
-			
+
 			for(int i = 0; i < activeMatrices; i++)
 			{
 				PBVM[i] = PBV * M[i];
@@ -737,7 +737,7 @@
 		if(updateViewMatrix)
 		{
 			PBV = PB * V;
-			
+
 			for(int i = 0; i < activeMatrices; i++)
 			{
 				PBVM[i] = PBV * M[i];
@@ -808,7 +808,7 @@
 		state.shaderContainsTexldl = context->vertexShader ? context->vertexShader->containsTexldl() : false;
 		state.positionRegister = context->vertexShader ? context->vertexShader->positionRegister : Pos;
 		state.pointSizeRegister = context->vertexShader ? context->vertexShader->pointSizeRegister : Pts;
-		
+
 		state.vertexBlendMatrixCount = context->vertexBlendMatrixCountActive();
 		state.indexedVertexBlendEnable = context->indexedVertexBlendActive();
 		state.vertexNormalActive = context->vertexNormalActive();
@@ -888,7 +888,7 @@
 			{
 				state.output[D0].write = 0xF;
 			}
-			
+
 			if(context->specularActive())
 			{
 				state.output[D1].write = 0xF;
@@ -968,7 +968,7 @@
 			}
 
 			generator->generate();
-			routine = generator->getRoutine();
+			routine = (*generator)(L"VertexRoutine_%0.8X", state.shaderID);
 			delete generator;
 
 			routineCache->add(state, routine);
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index b875517..d41b2b7 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -23,9 +23,8 @@
 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
 	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
 
-	VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : state(state), shader(shader)
+	VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : r(shader), state(state), shader(shader)
 	{
-		routine = 0;
 	}
 
 	VertexRoutine::~VertexRoutine()
@@ -34,67 +33,56 @@
 
 	void VertexRoutine::generate()
 	{
-		Function<Void(Pointer<Byte>, Pointer<Byte>, Pointer<Byte>, Pointer<Byte>)> function;
+		Pointer<Byte> vertex(Arg<0>());
+		Pointer<Byte> batch(Arg<1>());
+		Pointer<Byte> task(Arg<2>());
+		Pointer<Byte> data(Arg<3>());
+
+		const bool texldl = state.shaderContainsTexldl;
+
+		Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
+		Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
+		Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag);
+
+		UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
+
+		r.data = data;
+		r.constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
+		if(shader && shader->instanceIdDeclared)
 		{
-			Pointer<Byte> vertex(function.Arg<0>());
-			Pointer<Byte> batch(function.Arg<1>());
-			Pointer<Byte> task(function.Arg<2>());
-			Pointer<Byte> data(function.Arg<3>());
-
-			const bool texldl = state.shaderContainsTexldl;
-
-			Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
-			Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
-			Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag);
-
-			UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
-
-			Registers r(shader);
-			r.data = data;
-			r.constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
-			if(shader && shader->instanceIdDeclared)
-			{
-				r.instanceID = *Pointer<Int>(data + OFFSET(DrawData, instanceID));
-			}
-
-			Do
-			{
-				UInt index = *Pointer<UInt>(batch);
-				UInt tagIndex = index & 0x0000003C;
-				UInt indexQ = !texldl ? UInt(index & 0xFFFFFFFC) : index;   // FIXME: TEXLDL hack to have independent LODs, hurts performance.
-
-				If(*Pointer<UInt>(tagCache + tagIndex) != indexQ)
-				{
-					*Pointer<UInt>(tagCache + tagIndex) = indexQ;
-
-					readInput(r, indexQ);
-					pipeline(r);
-					postTransform(r);
-					computeClipFlags(r);
-
-					Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex));
-					writeCache(cacheLine0, r);
-				}
-
-				UInt cacheIndex = index & 0x0000003F;
-				Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
-				writeVertex(vertex, cacheLine);
-
-				vertex += sizeof(Vertex);
-				batch += sizeof(unsigned int);
-				vertexCount--;
-			}
-			Until(vertexCount == 0)
-
-			Return();
+			r.instanceID = *Pointer<Int>(data + OFFSET(DrawData, instanceID));
 		}
 
-		routine = function(L"VertexRoutine_%0.8X", state.shaderID);
-	}
+		Do
+		{
+			UInt index = *Pointer<UInt>(batch);
+			UInt tagIndex = index & 0x0000003C;
+			UInt indexQ = !texldl ? UInt(index & 0xFFFFFFFC) : index;   // FIXME: TEXLDL hack to have independent LODs, hurts performance.
 
-	Routine *VertexRoutine::getRoutine()
-	{
-		return routine;
+			If(*Pointer<UInt>(tagCache + tagIndex) != indexQ)
+			{
+				*Pointer<UInt>(tagCache + tagIndex) = indexQ;
+
+				readInput(r, indexQ);
+				pipeline(r);
+				postTransform(r);
+				computeClipFlags(r);
+
+				Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex));
+				writeCache(cacheLine0, r);
+			}
+
+			UInt cacheIndex = index & 0x0000003F;
+			Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
+			writeVertex(vertex, cacheLine);
+
+			vertex += sizeof(Vertex);
+			batch += sizeof(unsigned int);
+			vertexCount--;
+		}
+		Until(vertexCount == 0)
+
+		Return();
 	}
 
 	void VertexRoutine::readInput(Registers &r, UInt &index)
@@ -242,7 +230,7 @@
 				v.y = Float4(*Pointer<Short4>(source1));
 				v.z = Float4(*Pointer<Short4>(source2));
 				v.w = Float4(*Pointer<Short4>(source3));
-			
+
 				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
 
 				if(stream.normalized)
@@ -251,7 +239,7 @@
 					if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
 					if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
 					if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
-				}			
+				}
 			}
 			break;
 		case STREAMTYPE_USHORT:
@@ -260,7 +248,7 @@
 				v.y = Float4(*Pointer<UShort4>(source1));
 				v.z = Float4(*Pointer<UShort4>(source2));
 				v.w = Float4(*Pointer<UShort4>(source3));
-			
+
 				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
 
 				if(stream.normalized)
@@ -277,7 +265,7 @@
 				// FIXME: Vectorize
 				{
 					Int x, y, z;
-					
+
 					x = y = z = *Pointer<Int>(source0);
 
 					v.x.x = Float(x & 0x000003FF);
@@ -287,7 +275,7 @@
 
 				{
 					Int x, y, z;
-					
+
 					x = y = z = *Pointer<Int>(source1);
 
 					v.y.x = Float(x & 0x000003FF);
@@ -297,7 +285,7 @@
 
 				{
 					Int x, y, z;
-					
+
 					x = y = z = *Pointer<Int>(source2);
 
 					v.z.x = Float(x & 0x000003FF);
@@ -307,7 +295,7 @@
 
 				{
 					Int x, y, z;
-					
+
 					x = y = z = *Pointer<Int>(source3);
 
 					v.w.x = Float(x & 0x000003FF);
@@ -326,7 +314,7 @@
 				// FIXME: Vectorize
 				{
 					Int x, y, z;
-					
+
 					x = y = z = *Pointer<Int>(source0);
 
 					v.x.x = Float((x << 22) & 0xFFC00000);
@@ -336,7 +324,7 @@
 
 				{
 					Int x, y, z;
-					
+
 					x = y = z = *Pointer<Int>(source1);
 
 					v.y.x = Float((x << 22) & 0xFFC00000);
@@ -346,7 +334,7 @@
 
 				{
 					Int x, y, z;
-					
+
 					x = y = z = *Pointer<Int>(source2);
 
 					v.z.x = Float((x << 22) & 0xFFC00000);
@@ -356,7 +344,7 @@
 
 				{
 					Int x, y, z;
-					
+
 					x = y = z = *Pointer<Int>(source3);
 
 					v.w.x = Float((x << 22) & 0xFFC00000);
diff --git a/src/Shader/VertexRoutine.hpp b/src/Shader/VertexRoutine.hpp
index 26663d7..55b1018 100644
--- a/src/Shader/VertexRoutine.hpp
+++ b/src/Shader/VertexRoutine.hpp
@@ -19,7 +19,7 @@
 
 namespace sw
 {
-	class VertexRoutine
+	class VertexRoutine : public Function<Void(Pointer<Byte>, Pointer<Byte>, Pointer<Byte>, Pointer<Byte>)>
 	{
 	protected:
 		struct Registers
@@ -31,7 +31,7 @@
 			{
 				loopDepth = -1;
 				enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-				
+
 				if(shader && shader->containsBreakInstruction())
 				{
 					enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
@@ -71,31 +71,30 @@
 			Int instanceID;
 		};
 
+		Registers r;
+
 	public:
 		VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader);
 
 		virtual ~VertexRoutine();
 
 		void generate();
-		Routine *getRoutine();
 
 	protected:
 		const VertexProcessor::State &state;
 		const VertexShader *const shader;
 
-	private:		
+	private:
 		virtual void pipeline(Registers &r) = 0;
 
 		typedef VertexProcessor::State::Input Stream;
-		
+
 		Vector4f readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index);
 		void readInput(Registers &r, UInt &index);
 		void computeClipFlags(Registers &r);
 		void postTransform(Registers &r);
 		void writeCache(Pointer<Byte> &cacheLine, Registers &r);
 		void writeVertex(Pointer<Byte> &vertex, Pointer<Byte> &cacheLine);
-
-		Routine *routine;
 	};
 }