Vectorize R5G6B5 framebuffer present.

Change-Id: I40922c89056cacd00f9d728dfe1f3f12824c81a1
Reviewed-on: https://swiftshader-review.googlesource.com/3253
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Main/FrameBuffer.cpp b/src/Main/FrameBuffer.cpp
index 0b8c010..b9b87d7 100644
--- a/src/Main/FrameBuffer.cpp
+++ b/src/Main/FrameBuffer.cpp
@@ -263,17 +263,16 @@
 							}
 							break;
 						case FORMAT_R5G6B5:
-							For(, x < width, x++)
+							For(, x < width - 3, x += 4)
 							{
-								Int rgb = Int(*Pointer<Short>(s));
+								Int4 rgb = Int4(*Pointer<Short4>(s));
 
-								*Pointer<Int>(d) = 0xFF000000 |
-								                   ((rgb & 0xF800) << 8) | ((rgb & 0xE01F) << 3) |
-								                   ((rgb & 0x07E0) << 5) | ((rgb & 0x0600) >> 1) |
-								                   ((rgb & 0x001C) >> 2);
+								*Pointer<Int4>(d) = (((rgb & Int4(0xF800)) << 8) | ((rgb & Int4(0xE01F)) << 3)) |
+								                    (((rgb & Int4(0x07E0)) << 5) | ((rgb & Int4(0x0600)) >> 1)) |
+								                    (((rgb & Int4(0x001C)) >> 2) | Int4(0xFF000000));
 
-								s += sBytes;
-								d += dBytes;
+								s += 4 * sBytes;
+								d += 4 * dBytes;
 							}
 							break;
 						default:
@@ -372,17 +371,17 @@
 							}
 							break;
 						case FORMAT_R5G6B5:
-							For(, x < width, x++)
+							For(, x < width - 3, x += 4)
 							{
-								Int rgb = Int(*Pointer<Short>(s));
+								Int4 rgb = Int4(*Pointer<Short4>(s));
 
-								*Pointer<Int>(d) = 0xFF000000 |
-								                   ((rgb & 0x001F) << 19) | ((rgb & 0x001C) << 14) |
-								                   ((rgb & 0x07E0) << 5) | ((rgb & 0x0600) >> 1) |
-								                   ((rgb & 0xF800) >> 8) | ((rgb & 0xE000) >> 13);
+								*Pointer<Int4>(d) = Int4(0xFF000000) |
+                                                    (((rgb & Int4(0x001F)) << 19) | ((rgb & Int4(0x001C)) << 14)) |
+								                    (((rgb & Int4(0x07E0)) << 5) | ((rgb & Int4(0x0600)) >> 1)) |
+								                    (((rgb & Int4(0xF800)) >> 8) | ((rgb & Int4(0xE000)) >> 13));
 
-								s += sBytes;
-								d += dBytes;
+								s += 4 * sBytes;
+								d += 4 * dBytes;
 							}
 							break;
 						default:
diff --git a/src/OpenGL/libGLESv2/Program.cpp b/src/OpenGL/libGLESv2/Program.cpp
index 67006fe..4fcadea 100644
--- a/src/OpenGL/libGLESv2/Program.cpp
+++ b/src/OpenGL/libGLESv2/Program.cpp
@@ -496,7 +496,7 @@
 		if(targetUniform->type == floatType[index])
 		{
 			memcpy(targetUniform->data + uniformIndex[location].element * sizeof(GLfloat)* numElements,
-				   v, numElements * sizeof(GLfloat)* count);
+				   v, numElements * sizeof(GLfloat) * count);
 		}
 		else if(targetUniform->type == boolType[index])
 		{
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 42578fe..480ac6b 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -4887,7 +4887,7 @@
 
 			// Each Short is packed into each Int in the (Short | Short) format.
 			// Shifting by 16 will retrieve the original Short value.
-			// Shitfing an Int will propagate the sign bit, which will work
+			// Shifting an Int will propagate the sign bit, which will work
 			// for both positive and negative values of a Short.
 			*this >>= 16;
 		}