Non square matrices related fixes

- Implemented proper VariableRegisterCount (we were using row
  count instead of column count to get the number of registers)
  and VariableRegisterSize.
- Matrix to matrix copies now clear the correct rows of the
  destination matrix when needed
- Added registerSize helper function to type to help clarify
  this for matrices.
- Added missing member initializations in TType constructor

Change-Id: Ic880815515c7d12ad12e44f1392aa6892caa953f
Reviewed-on: https://swiftshader-review.googlesource.com/3718
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index 4fb4126..428be87 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp
@@ -964,11 +964,12 @@
 			if(visit == PostVisit)

 			{

 				TIntermTyped *arg0 = arg[0]->getAsTyped();

-				const int dim = result->getNominalSize();

+				const int outCols = result->getNominalSize();

+				const int outRows = result->getSecondarySize();

 

 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix

 				{

-					for(int i = 0; i < dim; i++)

+					for(int i = 0; i < outCols; i++)

 					{

 						Instruction *init = emit(sw::Shader::OPCODE_MOV, result, &zero);

 						init->dst.index += i;

@@ -980,9 +981,12 @@
 				}

 				else if(arg0->isMatrix())

 				{

-					for(int i = 0; i < dim; i++)

+					const int inCols = arg0->getNominalSize();

+					const int inRows = arg0->getSecondarySize();

+

+					for(int i = 0; i < outCols; i++)

 					{

-						if(dim > dim2(arg0))

+						if(i >= inCols || outRows > inRows)

 						{

 							// Initialize to identity matrix

 							Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));

@@ -990,11 +994,11 @@
 							mov->dst.index += i;

 						}

 

-						if(i < dim2(arg0))

+						if(i < inCols)

 						{

 							Instruction *mov = emitCast(result, arg0);

 							mov->dst.index += i;

-							mov->dst.mask = 0xF >> (4 - dim2(arg0));

+							mov->dst.mask = 0xF >> (4 - inRows);

 							argument(mov->src[0], arg0, i);

 						}

 					}

@@ -1018,9 +1022,9 @@
 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;

 

 							int end = row + size - element;

-							column = end >= dim ? column + 1 : column;

-							element = element + dim - row;

-							row = end >= dim ? 0 : end;

+							column = end >= outRows ? column + 1 : column;

+							element = element + outRows - row;

+							row = end >= outRows ? 0 : end;

 						}

 					}

 				}

@@ -1461,7 +1465,7 @@
 		}

 		else if(type.isMatrix())

 		{

-			return registers * type.getSecondarySize();

+			return registers * type.registerSize();

 		}

 		

 		UNREACHABLE(0);

@@ -1477,7 +1481,7 @@
 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);

 			}

 

-			return type.isMatrix() ? type.getSecondarySize() : type.getNominalSize();

+			return type.registerSize();

 		}

 

 		if(type.isArray() && registers >= type.elementRegisterCount())

@@ -2077,7 +2081,7 @@
 		if(var == -1)

 		{

 			var = allocate(varyings, varying);

-			int componentCount = varying->getNominalSize();

+			int componentCount = varying->registerSize();

 			int registerCount = varying->totalRegisterCount();

 

 			if(pixelShader)

diff --git a/src/OpenGL/compiler/SymbolTable.cpp b/src/OpenGL/compiler/SymbolTable.cpp
index ad3488e..7932bd5 100644
--- a/src/OpenGL/compiler/SymbolTable.cpp
+++ b/src/OpenGL/compiler/SymbolTable.cpp
@@ -26,8 +26,9 @@
 int TSymbolTableLevel::uniqueId = 0;
 
 TType::TType(const TPublicType &p) :
-    type(p.type), precision(p.precision), primarySize(p.primarySize), secondarySize(p.secondarySize), qualifier(p.qualifier), array(p.array), arraySize(p.arraySize),
-    maxArraySize(0), arrayInformationType(0), structure(0), deepestStructNesting(0), mangled(0)
+    type(p.type), precision(p.precision), qualifier(p.qualifier), invariant(false), layoutQualifier(TLayoutQualifier::create()),

+    primarySize(p.primarySize), secondarySize(p.secondarySize), array(p.array), arraySize(p.arraySize), maxArraySize(0),

+    arrayInformationType(0), interfaceBlock(0), structure(0), deepestStructNesting(0), mangled(0)
 {
     if (p.userDef)
     {
diff --git a/src/OpenGL/compiler/Types.h b/src/OpenGL/compiler/Types.h
index cb2722c..e58c996 100644
--- a/src/OpenGL/compiler/Types.h
+++ b/src/OpenGL/compiler/Types.h
@@ -345,6 +345,11 @@
 		}
 	}
 
+	int registerSize() const
+	{
+		return isMatrix() ? secondarySize : primarySize;
+	}
+
 	bool isMatrix() const { return secondarySize > 1; }
 	void setSecondarySize(int s1) { secondarySize = s1; }
 	int getSecondarySize() const { return secondarySize; }
diff --git a/src/OpenGL/compiler/intermediate.h b/src/OpenGL/compiler/intermediate.h
index 99d126d..8b72d30 100644
--- a/src/OpenGL/compiler/intermediate.h
+++ b/src/OpenGL/compiler/intermediate.h
@@ -321,6 +321,7 @@
 
 	int totalRegisterCount() const { return type.totalRegisterCount(); }
 	int elementRegisterCount() const { return type.elementRegisterCount(); }
+	int registerSize() const { return type.registerSize(); }
 	int getArraySize() const { return type.getArraySize(); }
 
 protected:
diff --git a/src/OpenGL/libGLESv2/Program.cpp b/src/OpenGL/libGLESv2/Program.cpp
index dd4c598..b8ba80b 100644
--- a/src/OpenGL/libGLESv2/Program.cpp
+++ b/src/OpenGL/libGLESv2/Program.cpp
@@ -65,7 +65,7 @@
 

 	int Uniform::registerCount() const

 	{

-		return size() * VariableRowCount(type);

+		return size() * VariableRegisterCount(type);

 	}

 

 	UniformBlock::UniformBlock(const std::string &name, unsigned int elementIndex, unsigned int dataSize) :

@@ -1128,8 +1128,8 @@
 				{

 					int in = input->reg;

 					int out = output->reg;

-					int components = VariableColumnCount(output->type);

-					int registers = VariableRowCount(output->type) * output->size();

+					int components = VariableRegisterSize(output->type);

+					int registers = VariableRegisterCount(output->type) * output->size();

 

 					ASSERT(in >= 0);

 

@@ -1270,7 +1270,7 @@
 

 				linkedAttribute[location] = *attribute;

 

-				int rows = VariableRowCount(attribute->type);

+				int rows = VariableRegisterCount(attribute->type);

 

 				if(rows + location > MAX_VERTEX_ATTRIBS)

 				{

@@ -1292,7 +1292,7 @@
 

 			if(location == -1)   // Not set by glBindAttribLocation

 			{

-				int rows = VariableRowCount(attribute->type);

+				int rows = VariableRegisterCount(attribute->type);

 				int availableIndex = AllocateFirstFreeBits(&usedLocations, rows, MAX_VERTEX_ATTRIBS);

 

 				if(availableIndex == -1 || availableIndex + rows > MAX_VERTEX_ATTRIBS)

@@ -1308,7 +1308,7 @@
 		for(int attributeIndex = 0; attributeIndex < MAX_VERTEX_ATTRIBS; )

 		{

 			int index = vertexShader->getSemanticIndex(linkedAttribute[attributeIndex].name);

-			int rows = std::max(VariableRowCount(linkedAttribute[attributeIndex].type), 1);

+			int rows = std::max(VariableRegisterCount(linkedAttribute[attributeIndex].type), 1);

 

 			for(int r = 0; r < rows; r++)

 			{

diff --git a/src/OpenGL/libGLESv2/utilities.cpp b/src/OpenGL/libGLESv2/utilities.cpp
index ea19daf..bdb8e46 100644
--- a/src/OpenGL/libGLESv2/utilities.cpp
+++ b/src/OpenGL/libGLESv2/utilities.cpp
@@ -278,6 +278,19 @@
 		return 0;

 	}

 

+	int VariableRegisterCount(GLenum type)

+	{

+		// Number of registers used is the number of columns for matrices or 1 for scalars and vectors

+		return (VariableRowCount(type) > 1) ? VariableColumnCount(type) : 1;

+	}

+

+	int VariableRegisterSize(GLenum type)

+	{

+		// Number of components per register is the number of rows for matrices or columns for scalars and vectors

+		int nbRows = VariableRowCount(type);

+		return (nbRows > 1) ? nbRows : VariableColumnCount(type);

+	}

+

 	int AllocateFirstFreeBits(unsigned int *bits, unsigned int allocationSize, unsigned int bitsSize)

 	{

 		ASSERT(allocationSize <= bitsSize);

diff --git a/src/OpenGL/libGLESv2/utilities.h b/src/OpenGL/libGLESv2/utilities.h
index baa699f..82f5fa9 100644
--- a/src/OpenGL/libGLESv2/utilities.h
+++ b/src/OpenGL/libGLESv2/utilities.h
@@ -33,6 +33,8 @@
 	bool IsSamplerUniform(GLenum type);

 	int VariableRowCount(GLenum type);

 	int VariableColumnCount(GLenum type);

+	int VariableRegisterCount(GLenum type);

+	int VariableRegisterSize(GLenum type);

 

 	int AllocateFirstFreeBits(unsigned int *bits, unsigned int allocationSize, unsigned int bitsSize);