Transform feedback query implementation

This is a first implementation of the transform feedback
primitives written query.

It passes available related dEQP tests (more tests will be
available when transform feedback is fully implemented).

Change-Id: Iaee97e3e2e853174b7f22836f72dabede1ed04c1
Reviewed-on: https://swiftshader-review.googlesource.com/4912
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/D3D9/Direct3DQuery9.cpp b/src/D3D9/Direct3DQuery9.cpp
index c0f4323..703d9e0 100644
--- a/src/D3D9/Direct3DQuery9.cpp
+++ b/src/D3D9/Direct3DQuery9.cpp
@@ -24,7 +24,7 @@
 	{
 		if(type == D3DQUERYTYPE_OCCLUSION)
 		{
-			query = new sw::Query();
+			query = new sw::Query(sw::Query::FRAGMENTS_PASSED);
 		}
 		else
 		{
diff --git a/src/OpenGL/libGL/Query.cpp b/src/OpenGL/libGL/Query.cpp
index 9ef0790..a6ce26f 100644
--- a/src/OpenGL/libGL/Query.cpp
+++ b/src/OpenGL/libGL/Query.cpp
@@ -39,8 +39,19 @@
 {

     if(mQuery == NULL)

     {

-		mQuery = new sw::Query();

-        

+		sw::Query::Type type;

+		switch(mType)

+		{

+		case GL_ANY_SAMPLES_PASSED:

+		case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:

+			type = sw::Query::FRAGMENTS_PASSED;

+			break;

+		default:

+			ASSERT(false);

+		}

+

+		mQuery = new sw::Query(type);

+

 		if(!mQuery)

         {

             return error(GL_OUT_OF_MEMORY);

diff --git a/src/OpenGL/libGLESv2/Query.cpp b/src/OpenGL/libGLESv2/Query.cpp
index 6439e27..b425cb6 100644
--- a/src/OpenGL/libGLESv2/Query.cpp
+++ b/src/OpenGL/libGLESv2/Query.cpp
@@ -39,8 +39,22 @@
 {

     if(mQuery == NULL)

     {

-		mQuery = new sw::Query();

-        

+		sw::Query::Type type;

+		switch(mType)

+		{

+		case GL_ANY_SAMPLES_PASSED_EXT:

+		case GL_ANY_SAMPLES_PASSED_CONSERVATIVE_EXT:

+			type = sw::Query::FRAGMENTS_PASSED;

+			break;

+		case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:

+			type = sw::Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN;

+			break;

+		default:

+			ASSERT(false);

+		}

+

+		mQuery = new sw::Query(type);

+

 		if(!mQuery)

         {

             return error(GL_OUT_OF_MEMORY);

@@ -51,7 +65,18 @@
 

 	mQuery->begin();

 	device->addQuery(mQuery);

-	device->setOcclusionEnabled(true);

+	switch(mType)

+	{

+	case GL_ANY_SAMPLES_PASSED_EXT:

+	case GL_ANY_SAMPLES_PASSED_CONSERVATIVE_EXT:

+		device->setOcclusionEnabled(true);

+		break;

+	case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:

+		device->setTransformFeedbackQueryEnabled(true);

+		break;

+	default:

+		ASSERT(false);

+	}

 }

 

 void Query::end()

@@ -65,8 +90,19 @@
 

     mQuery->end();

 	device->removeQuery(mQuery);

-	device->setOcclusionEnabled(false);

-    

+	switch(mType)

+	{

+	case GL_ANY_SAMPLES_PASSED_EXT:

+	case GL_ANY_SAMPLES_PASSED_CONSERVATIVE_EXT:

+		device->setOcclusionEnabled(false);

+		break;

+	case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:

+		device->setTransformFeedbackQueryEnabled(false);

+		break;

+	default:

+		ASSERT(false);

+	}

+

     mStatus = GL_FALSE;

     mResult = GL_FALSE;

 }

@@ -105,17 +141,17 @@
     {

         if(!mQuery->building && mQuery->reference == 0)

         {

-			unsigned int numPixels = mQuery->data;

+			unsigned int resultSum = mQuery->data;

             mStatus = GL_TRUE;

 

             switch(mType)

             {

             case GL_ANY_SAMPLES_PASSED_EXT:

             case GL_ANY_SAMPLES_PASSED_CONSERVATIVE_EXT:

-                mResult = (numPixels > 0) ? GL_TRUE : GL_FALSE;

+				mResult = (resultSum > 0) ? GL_TRUE : GL_FALSE;

                 break;

             case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:

-                UNIMPLEMENTED();

+				mResult = resultSum;

                 break;

             default:

                 ASSERT(false);

diff --git a/src/Renderer/Context.cpp b/src/Renderer/Context.cpp
index bde6c94..c4b044b 100644
--- a/src/Renderer/Context.cpp
+++ b/src/Renderer/Context.cpp
@@ -297,6 +297,8 @@
 		instanceID = 0;
 
 		occlusionEnabled = false;
+		transformFeedbackQueryEnabled = false;
+		transformFeedbackEnabled = false;
 
 		pointSpriteEnable = false;
 		pointScaleEnable = false;
diff --git a/src/Renderer/Context.hpp b/src/Renderer/Context.hpp
index 169df8a..5e215cb 100644
--- a/src/Renderer/Context.hpp
+++ b/src/Renderer/Context.hpp
@@ -492,6 +492,8 @@
 		bool colorVertexEnable;

 

 		bool occlusionEnabled;

+		bool transformFeedbackQueryEnabled;

+		bool transformFeedbackEnabled;

 

 		// Pixel processor states

 		bool rasterizerDiscard;

diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index cc95546..5535967 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -303,12 +303,17 @@
 
 			if(queries.size() != 0)
 			{
+				draw->queries = new std::list<Query*>();
+				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
 				for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
 				{
-					atomicIncrement(&(*query)->reference);
+					Query* q = *query;
+					if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
+					{
+						atomicIncrement(&(q->reference));
+						draw->queries->push_back(q);
+					}
 				}
-
-				draw->queries = new std::list<Query*>(queries);
 			}
 
 			draw->drawType = drawType;
@@ -881,12 +886,6 @@
 
 		pixelProgress[cluster].processedPrimitives = primitive + count;
 
-		if(pixelProgress[cluster].processedPrimitives >= draw.count)
-		{
-			pixelProgress[cluster].drawCall++;
-			pixelProgress[cluster].processedPrimitives = 0;
-		}
-
 		int ref = atomicDecrement(&primitiveProgress[unit].references);
 
 		if(ref == 0)
@@ -911,9 +910,19 @@
 					{
 						Query *query = *q;
 
-						for(int cluster = 0; cluster < clusterCount; cluster++)
+						switch(query->type)
 						{
-							atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
+						case Query::FRAGMENTS_PASSED:
+							for(int cluster = 0; cluster < clusterCount; cluster++)
+							{
+								atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
+							}
+							break;
+						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+							atomicAdd((volatile int*)&query->data, pixelProgress[cluster].processedPrimitives);
+							break;
+						default:
+							break;
 						}
 
 						atomicDecrement(&query->reference);
@@ -972,6 +981,12 @@
 			}
 		}
 
+		if(pixelProgress[cluster].processedPrimitives >= draw.count)
+		{
+			pixelProgress[cluster].drawCall++;
+			pixelProgress[cluster].processedPrimitives = 0;
+		}
+
 		pixelProgress[cluster].executing = false;
 	}
 
diff --git a/src/Renderer/Renderer.hpp b/src/Renderer/Renderer.hpp
index 30fccc5..f338259 100644
--- a/src/Renderer/Renderer.hpp
+++ b/src/Renderer/Renderer.hpp
@@ -86,11 +86,10 @@
 
 	struct Query
 	{
-		Query()
+		enum Type { FRAGMENTS_PASSED, TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN };
+
+		Query(Type type) : building(false), reference(0), data(0), type(type)
 		{
-			building = false;
-			reference = 0;
-			data = 0;
 		}
 
 		void begin()
@@ -107,6 +106,8 @@
 		bool building;
 		volatile int reference;
 		volatile unsigned int data;
+
+		const Type type;
 	};
 
 	struct DrawData
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index b71fdb2..eaaf3e2 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -642,6 +642,16 @@
 		point.pointScaleC = pointScaleC;
 	}
 
+	void VertexProcessor::setTransformFeedbackQueryEnabled(bool enable)
+	{
+		context->transformFeedbackQueryEnabled = enable;
+	}
+
+	void VertexProcessor::enableTransformFeedback(uint64_t enable)
+	{
+		context->transformFeedbackEnabled = enable;
+	}
+
 	const Matrix &VertexProcessor::getModelTransform(int i)
 	{
 		updateTransform();
@@ -873,6 +883,9 @@
 		state.superSampling = context->getSuperSampleCount() > 1;
 		state.multiSampling = context->getMultiSampleCount() > 1;
 
+		state.transformFeedbackQueryEnabled = context->transformFeedbackQueryEnabled;
+		state.transformFeedbackEnabled = context->transformFeedbackEnabled;
+
 		for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
 		{
 			state.input[i].type = context->input[i].type;
diff --git a/src/Renderer/VertexProcessor.hpp b/src/Renderer/VertexProcessor.hpp
index a8ddf1a..c353990 100644
--- a/src/Renderer/VertexProcessor.hpp
+++ b/src/Renderer/VertexProcessor.hpp
@@ -69,6 +69,8 @@
 			bool localViewerActive                            : 1;

 			bool pointSizeActive                              : 1;

 			bool pointScaleActive                             : 1;

+			bool transformFeedbackQueryEnabled                : 1;

+			uint64_t transformFeedbackEnabled                 : 64;

 

 			bool preTransformed : 1;

 			bool superSampling  : 1;

@@ -259,6 +261,9 @@
 		virtual void setPointScaleB(float pointScaleB);

 		virtual void setPointScaleC(float pointScaleC);

 

+		virtual void setTransformFeedbackQueryEnabled(bool enable);

+		virtual void enableTransformFeedback(uint64_t enable);

+

 	protected:

 		const Matrix &getModelTransform(int i);

 		const Matrix &getViewTransform();