Fix transpose2x4().

Change-Id: I079991d257be4aa00a0aef938ccf0110cd005bcd
Reviewed-on: https://swiftshader-review.googlesource.com/8288
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp
index be1ca2e..338ea08 100644
--- a/src/Shader/ShaderCore.cpp
+++ b/src/Shader/ShaderCore.cpp
@@ -475,18 +475,13 @@
 
 	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
 	{
-		row0 = UnpackLow(row0, row1);
-		row1 = Float4(row0.zw, row1.zw);
-		row2 = UnpackHigh(row0, row1);
-		row3 = Float4(row2.zw, row3.zw);
-	}
+		Float4 tmp01 = UnpackLow(row0, row1);
+		Float4 tmp23 = UnpackHigh(row0, row1);
 
-	void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
-	{
-		row0 = UnpackLow(row2, row3);
-		row1 = Float4(row0.zw, row1.zw);
-		row2 = UnpackHigh(row2, row3);
-		row3 = Float4(row2.zw, row3.zw);
+		row0 = tmp01;
+		row1 = Float4(tmp01.zw, row1.zw);
+		row2 = tmp23;
+		row3 = Float4(tmp23.zw, row3.zw);
 	}
 
 	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N)
diff --git a/src/Shader/ShaderCore.hpp b/src/Shader/ShaderCore.hpp
index edf442c..c7b8be4 100644
--- a/src/Shader/ShaderCore.hpp
+++ b/src/Shader/ShaderCore.hpp
@@ -87,7 +87,6 @@
 	void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
 	void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
 	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
-	void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
 	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
 
 	class Register
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index 0d22162..42faa80 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -687,7 +687,7 @@
 				}
 				else
 				{
-					if(state.output[i].write == 0x02)
+					if(state.output[i].write == 0x03)
 					{
 						transpose2x4(v.x, v.y, v.z, v.w);
 					}