Refactor Byte4 unpacking.

Change-Id: I82f8215ce4366e0795ce249b4d8f6c8e391af96c
Reviewed-on: https://swiftshader-review.googlesource.com/8568
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Main/FrameBuffer.cpp b/src/Main/FrameBuffer.cpp
index b9b87d7..013f4b5 100644
--- a/src/Main/FrameBuffer.cpp
+++ b/src/Main/FrameBuffer.cpp
@@ -568,17 +568,17 @@
 		Short4 c1;
 		Short4 c2;
 
-		c1 = UnpackLow(As<Byte8>(c1), *Pointer<Byte8>(c));
+		c1 = Unpack(*Pointer<Byte4>(c));
 
 		switch(state.sourceFormat)
 		{
 		case FORMAT_X8R8G8B8:
 		case FORMAT_A8R8G8B8:
-			c2 = UnpackLow(As<Byte8>(c2), *Pointer<Byte8>(s));
+			c2 = Unpack(*Pointer<Byte4>(s));
 			break;
 		case FORMAT_X8B8G8R8:
 		case FORMAT_A8B8G8R8:
-			c2 = Swizzle(UnpackLow(As<Byte8>(c2), *Pointer<Byte8>(s)), 0xC6);
+			c2 = Swizzle(Unpack(*Pointer<Byte4>(s)), 0xC6);
 			break;
 		case FORMAT_A16B16G16R16:
 			c2 = Swizzle(*Pointer<Short4>(s), 0xC6);
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index e6b4b94..d3da778 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -2141,6 +2141,14 @@
 		return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
 	}
 
+	RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
+	{
+		Value *xx = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
+		Value *yy = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), y.value, 0);
+
+		return UnpackLow(As<Byte8>(xx), As<Byte8>(yy));
+	}
+
 	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
 	{
 		if(CPUID::supportsMMX2())
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 74a3f6c..cfb005c 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -564,6 +564,7 @@
 	RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y);
 	RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y);
 	RValue<Short4> Unpack(RValue<Byte4> x);
+	RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y);
 	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y);
 	RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y);
 	RValue<Int> SignMask(RValue<Byte8> x);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 9cd6fb8..5468cb9 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -40,8 +40,8 @@
 #include <Windows.h>
 #else
 #include <sys/mman.h>
-#if !defined(MAP_ANONYMOUS)

-#define MAP_ANONYMOUS MAP_ANON

+#if !defined(MAP_ANONYMOUS)
+#define MAP_ANONYMOUS MAP_ANON
 #endif
 #endif
 
@@ -2572,6 +2572,11 @@
 		return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
 	}
 
+	RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
+	{
+		return UnpackLow(As<Byte8>(x), As<Byte8>(y));
+	}
+
 	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
 	{
 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 67b0f05..18feac3 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1690,7 +1690,7 @@
 			uuuu = applyOffset(uuuu, offset.x, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, width))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU);
 			vvvv = applyOffset(vvvv, offset.y, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, height))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV);
 		}
-		
+
 		Short4 uuu2 = uuuu;
 		uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
 		uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
@@ -1776,12 +1776,12 @@
 			{
 			case 4:
 				{
-					Byte8 c0 = *Pointer<Byte8>(buffer[f0] + 4 * index[0]);
-					Byte8 c1 = *Pointer<Byte8>(buffer[f1] + 4 * index[1]);
-					Byte8 c2 = *Pointer<Byte8>(buffer[f2] + 4 * index[2]);
-					Byte8 c3 = *Pointer<Byte8>(buffer[f3] + 4 * index[3]);
-					c.x = UnpackLow(c0, c1);
-					c.y = UnpackLow(c2, c3);
+					Byte4 c0 = *Pointer<Byte4>(buffer[f0] + 4 * index[0]);
+					Byte4 c1 = *Pointer<Byte4>(buffer[f1] + 4 * index[1]);
+					Byte4 c2 = *Pointer<Byte4>(buffer[f2] + 4 * index[2]);
+					Byte4 c3 = *Pointer<Byte4>(buffer[f3] + 4 * index[3]);
+					c.x = Unpack(c0, c1);
+					c.y = Unpack(c2, c3);
 
 					switch(state.textureFormat)
 					{
@@ -1819,12 +1819,12 @@
 				break;
 			case 3:
 				{
-					Byte8 c0 = *Pointer<Byte8>(buffer[f0] + 4 * index[0]);
-					Byte8 c1 = *Pointer<Byte8>(buffer[f1] + 4 * index[1]);
-					Byte8 c2 = *Pointer<Byte8>(buffer[f2] + 4 * index[2]);
-					Byte8 c3 = *Pointer<Byte8>(buffer[f3] + 4 * index[3]);
-					c.x = UnpackLow(c0, c1);
-					c.y = UnpackLow(c2, c3);
+					Byte4 c0 = *Pointer<Byte4>(buffer[f0] + 4 * index[0]);
+					Byte4 c1 = *Pointer<Byte4>(buffer[f1] + 4 * index[1]);
+					Byte4 c2 = *Pointer<Byte4>(buffer[f2] + 4 * index[2]);
+					Byte4 c3 = *Pointer<Byte4>(buffer[f3] + 4 * index[3]);
+					c.x = Unpack(c0, c1);
+					c.y = Unpack(c2, c3);
 
 					switch(state.textureFormat)
 					{