Fix for the remaining vertex array issues

Fixes all failures in dEQP.functional.vertex_arrays.*

In OpenGL ES 3.0, vertex attributes streams can be of a
different type from the vertex attribute in the shader.
For this reason, some conversion may be required. This
cl solves this issue by:
1) Sending the information about the vertex attribute's
   type in the shader to the vertex routine.
2) Handling this information by adding conversion where
   appropriate.

Change-Id: I04a5a34aea12684209e584aa5f15a3edfd57f956
Reviewed-on: https://swiftshader-review.googlesource.com/7254
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index 29da8e4..4cc379a 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp
@@ -2890,12 +2890,25 @@
 				index = allocate(attributes, attribute);
 				const TType &type = attribute->getType();
 				int registerCount = attribute->totalRegisterCount();
+				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
+				switch(type.getBasicType())
+				{
+				case EbtInt:
+					attribType = sw::VertexShader::ATTRIBTYPE_INT;
+					break;
+				case EbtUInt:
+					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
+					break;
+				case EbtFloat:
+				default:
+					break;
+				}
 
 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
 				{
 					for(int i = 0; i < registerCount; i++)
 					{
-						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i));
+						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
 					}
 				}
 
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index ced0870..a64c010 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -958,6 +958,7 @@
 			state.input[i].type = context->input[i].type;
 			state.input[i].count = context->input[i].count;
 			state.input[i].normalized = context->input[i].normalized;
+			state.input[i].attribType = context->vertexShader ? context->vertexShader->getAttribType(i) : VertexShader::ATTRIBTYPE_FLOAT;
 		}
 
 		if(!context->vertexShader)
diff --git a/src/Renderer/VertexProcessor.hpp b/src/Renderer/VertexProcessor.hpp
index f0cf124..278c9b1 100644
--- a/src/Renderer/VertexProcessor.hpp
+++ b/src/Renderer/VertexProcessor.hpp
@@ -18,6 +18,7 @@
 #include "Matrix.hpp"
 #include "Context.hpp"
 #include "RoutineCache.hpp"
+#include "Shader/VertexShader.hpp"
 
 namespace sw
 {
@@ -102,6 +103,7 @@
 				StreamType type    : BITS(STREAMTYPE_LAST);
 				unsigned int count : 3;
 				bool normalized    : 1;
+				unsigned int attribType : BITS(VertexShader::ATTRIBTYPE_LAST);
 			};
 
 			struct Output
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index c67a8bc..087e481 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -148,6 +148,8 @@
 		Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0);
 		Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0);
 
+		bool isNativeFloatAttrib = (stream.attribType == VertexShader::ATTRIBTYPE_FLOAT) || stream.normalized;
+
 		switch(stream.type)
 		{
 		case STREAMTYPE_FLOAT:
@@ -156,25 +158,47 @@
 				{
 					// Null stream, all default components
 				}
-				else if(stream.count == 1)
-				{
-					v.x.x = *Pointer<Float>(source0);
-					v.x.y = *Pointer<Float>(source1);
-					v.x.z = *Pointer<Float>(source2);
-					v.x.w = *Pointer<Float>(source3);
-				}
 				else
 				{
-					v.x = *Pointer<Float4>(source0);
-					v.y = *Pointer<Float4>(source1);
-					v.z = *Pointer<Float4>(source2);
-					v.w = *Pointer<Float4>(source3);
+					if(stream.count == 1)
+					{
+						v.x.x = *Pointer<Float>(source0);
+						v.x.y = *Pointer<Float>(source1);
+						v.x.z = *Pointer<Float>(source2);
+						v.x.w = *Pointer<Float>(source3);
+					}
+					else
+					{
+						v.x = *Pointer<Float4>(source0);
+						v.y = *Pointer<Float4>(source1);
+						v.z = *Pointer<Float4>(source2);
+						v.w = *Pointer<Float4>(source3);
 
-					transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+						transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+					}
+
+					switch(stream.attribType)
+					{
+					case VertexShader::ATTRIBTYPE_INT:
+						if(stream.count >= 1) v.x = As<Float4>(Int4(v.x));
+						if(stream.count >= 2) v.x = As<Float4>(Int4(v.y));
+						if(stream.count >= 3) v.x = As<Float4>(Int4(v.z));
+						if(stream.count >= 4) v.x = As<Float4>(Int4(v.w));
+						break;
+					case VertexShader::ATTRIBTYPE_UINT:
+						if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x));
+						if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y));
+						if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z));
+						if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w));
+						break;
+					default:
+						break;
+					}
 				}
 			}
 			break;
 		case STREAMTYPE_BYTE:
+			if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<Byte4>(source0));
 				v.y = Float4(*Pointer<Byte4>(source1));
@@ -191,8 +215,18 @@
 					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
 				}
 			}
+			else // Stream: UByte, Shader attrib: Int / UInt
+			{
+				v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
+				v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
+				v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
+				v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
 			break;
 		case STREAMTYPE_SBYTE:
+			if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<SByte4>(source0));
 				v.y = Float4(*Pointer<SByte4>(source1));
@@ -209,6 +243,15 @@
 					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
 				}
 			}
+			else // Stream: SByte, Shader attrib: Int / UInt
+			{
+				v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
+				v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
+				v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
+				v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
 			break;
 		case STREAMTYPE_COLOR:
 			{
@@ -226,6 +269,7 @@
 			}
 			break;
 		case STREAMTYPE_SHORT:
+			if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<Short4>(source0));
 				v.y = Float4(*Pointer<Short4>(source1));
@@ -242,8 +286,18 @@
 					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
 				}
 			}
+			else // Stream: Short, Shader attrib: Int/UInt, no type conversion
+			{
+				v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
+				v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
+				v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
+				v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
 			break;
 		case STREAMTYPE_USHORT:
+			if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<UShort4>(source0));
 				v.y = Float4(*Pointer<UShort4>(source1));
@@ -260,59 +314,70 @@
 					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
 				}
 			}
+			else // Stream: UShort, Shader attrib: Int/UInt, no type conversion
+			{
+				v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
+				v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
+				v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
+				v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+			}
 			break;
 		case STREAMTYPE_INT:
+			if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
 			{
+				v.x = Float4(*Pointer<Int4>(source0));
+				v.y = Float4(*Pointer<Int4>(source1));
+				v.z = Float4(*Pointer<Int4>(source2));
+				v.w = Float4(*Pointer<Int4>(source3));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
 				if(stream.normalized)
 				{
-					v.x = Float4(*Pointer<Int4>(source0));
-					v.y = Float4(*Pointer<Int4>(source1));
-					v.z = Float4(*Pointer<Int4>(source2));
-					v.w = Float4(*Pointer<Int4>(source3));
-
-					transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
 					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
 					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
 					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
 					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
 				}
-				else
-				{
-					v.x = As<Float4>(*Pointer<Int4>(source0));
-					v.y = As<Float4>(*Pointer<Int4>(source1));
-					v.z = As<Float4>(*Pointer<Int4>(source2));
-					v.w = As<Float4>(*Pointer<Int4>(source3));
+			}
+			else // Stream: Int, Shader attrib: Int/UInt, no type conversion
+			{
+				v.x = *Pointer<Float4>(source0);
+				v.y = *Pointer<Float4>(source1);
+				v.z = *Pointer<Float4>(source2);
+				v.w = *Pointer<Float4>(source3);
 
-					transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-				}
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
 			}
 			break;
 		case STREAMTYPE_UINT:
+			if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float
 			{
+				v.x = Float4(*Pointer<UInt4>(source0));
+				v.y = Float4(*Pointer<UInt4>(source1));
+				v.z = Float4(*Pointer<UInt4>(source2));
+				v.w = Float4(*Pointer<UInt4>(source3));
+
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+
 				if(stream.normalized)
 				{
-					v.x = Float4(*Pointer<UInt4>(source0));
-					v.y = Float4(*Pointer<UInt4>(source1));
-					v.z = Float4(*Pointer<UInt4>(source2));
-					v.w = Float4(*Pointer<UInt4>(source3));
-
-					transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-
 					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
 					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
 					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
 					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
 				}
-				else
-				{
-					v.x = As<Float4>(*Pointer<UInt4>(source0));
-					v.y = As<Float4>(*Pointer<UInt4>(source1));
-					v.z = As<Float4>(*Pointer<UInt4>(source2));
-					v.w = As<Float4>(*Pointer<UInt4>(source3));
+			}
+			else // Stream: UInt, Shader attrib: Int/UInt, no type conversion
+			{
+				v.x = *Pointer<Float4>(source0);
+				v.y = *Pointer<Float4>(source1);
+				v.z = *Pointer<Float4>(source2);
+				v.w = *Pointer<Float4>(source3);
 
-					transpose4xN(v.x, v.y, v.z, v.w, stream.count);
-				}
+				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
 			}
 			break;
 		case STREAMTYPE_UDEC3:
@@ -538,7 +603,7 @@
 		if(stream.count < 1) v.x = Float4(0.0f);
 		if(stream.count < 2) v.y = Float4(0.0f);
 		if(stream.count < 3) v.z = Float4(0.0f);
-		if(stream.count < 4) v.w = Float4(1.0f);
+		if(stream.count < 4) v.w = isNativeFloatAttrib ? Float4(1.0f) : As<Float4>(Int4(0));
 
 		return v;
 	}
diff --git a/src/Shader/VertexShader.cpp b/src/Shader/VertexShader.cpp
index dd62b4b..a98932b 100644
--- a/src/Shader/VertexShader.cpp
+++ b/src/Shader/VertexShader.cpp
@@ -32,6 +32,7 @@
 		for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
 		{
 			input[i] = Semantic();
+			attribType[i] = ATTRIBTYPE_FLOAT;
 		}
 
 		if(vs)   // Make a copy
@@ -43,6 +44,7 @@
 
 			memcpy(output, vs->output, sizeof(output));
 			memcpy(input, vs->input, sizeof(input));
+			memcpy(attribType, vs->attribType, sizeof(attribType));
 			positionRegister = vs->positionRegister;
 			pointSizeRegister = vs->pointSizeRegister;
 			instanceIdDeclared = vs->instanceIdDeclared;
@@ -65,6 +67,7 @@
 		for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
 		{
 			input[i] = Semantic();
+			attribType[i] = ATTRIBTYPE_FLOAT;
 		}
 
 		optimize();
@@ -151,9 +154,10 @@
 		return textureSampling;
 	}
 
-	void VertexShader::setInput(int inputIdx, const sw::Shader::Semantic& semantic)
+	void VertexShader::setInput(int inputIdx, const sw::Shader::Semantic& semantic, AttribType aType)
 	{
 		input[inputIdx] = semantic;
+		attribType[inputIdx] = aType;
 	}
 
 	void VertexShader::setOutput(int outputIdx, int nbComponents, const sw::Shader::Semantic& semantic)
@@ -181,6 +185,11 @@
 		return input[inputIdx];
 	}
 
+	VertexShader::AttribType VertexShader::getAttribType(int inputIdx) const
+	{
+		return attribType[inputIdx];
+	}
+
 	const sw::Shader::Semantic& VertexShader::getOutput(int outputIdx, int component) const
 	{
 		return output[outputIdx][component];
diff --git a/src/Shader/VertexShader.hpp b/src/Shader/VertexShader.hpp
index 4eb6786..0ca7b93 100644
--- a/src/Shader/VertexShader.hpp
+++ b/src/Shader/VertexShader.hpp
@@ -23,6 +23,15 @@
 	class VertexShader : public Shader
 	{
 	public:
+		enum AttribType : unsigned char
+		{
+			ATTRIBTYPE_FLOAT,
+			ATTRIBTYPE_INT,
+			ATTRIBTYPE_UINT,
+
+			ATTRIBTYPE_LAST = ATTRIBTYPE_UINT
+		};
+
 		explicit VertexShader(const VertexShader *vs = 0);
 		explicit VertexShader(const unsigned long *token);
 
@@ -31,7 +40,7 @@
 		static int validate(const unsigned long *const token);   // Returns number of instructions if valid
 		bool containsTextureSampling() const;
 
-		void setInput(int inputIdx, const Semantic& semantic);
+		void setInput(int inputIdx, const Semantic& semantic, AttribType attribType = ATTRIBTYPE_FLOAT);
 		void setOutput(int outputIdx, int nbComponents, const Semantic& semantic);
 		void setPositionRegister(int posReg);
 		void setPointSizeRegister(int ptSizeReg);
@@ -39,6 +48,7 @@
 
 		const Semantic& getInput(int inputIdx) const;
 		const Semantic& getOutput(int outputIdx, int component) const;
+		AttribType getAttribType(int inputIndex) const;
 		int getPositionRegister() const { return positionRegister; }
 		int getPointSizeRegister() const { return pointSizeRegister; }
 		bool isInstanceIdDeclared() const { return instanceIdDeclared; }
@@ -52,6 +62,8 @@
 		Semantic input[MAX_VERTEX_INPUTS];
 		Semantic output[MAX_VERTEX_OUTPUTS][4];
 
+		AttribType attribType[MAX_VERTEX_INPUTS];
+
 		int positionRegister;
 		int pointSizeRegister;