Refactor Byte4 unpacking.
Change-Id: I82f8215ce4366e0795ce249b4d8f6c8e391af96c
Reviewed-on: https://swiftshader-review.googlesource.com/8568
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Main/FrameBuffer.cpp b/src/Main/FrameBuffer.cpp
index b9b87d7..013f4b5 100644
--- a/src/Main/FrameBuffer.cpp
+++ b/src/Main/FrameBuffer.cpp
@@ -568,17 +568,17 @@
Short4 c1;
Short4 c2;
- c1 = UnpackLow(As<Byte8>(c1), *Pointer<Byte8>(c));
+ c1 = Unpack(*Pointer<Byte4>(c));
switch(state.sourceFormat)
{
case FORMAT_X8R8G8B8:
case FORMAT_A8R8G8B8:
- c2 = UnpackLow(As<Byte8>(c2), *Pointer<Byte8>(s));
+ c2 = Unpack(*Pointer<Byte4>(s));
break;
case FORMAT_X8B8G8R8:
case FORMAT_A8B8G8R8:
- c2 = Swizzle(UnpackLow(As<Byte8>(c2), *Pointer<Byte8>(s)), 0xC6);
+ c2 = Swizzle(Unpack(*Pointer<Byte4>(s)), 0xC6);
break;
case FORMAT_A16B16G16R16:
c2 = Swizzle(*Pointer<Short4>(s), 0xC6);
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index e6b4b94..d3da778 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -2141,6 +2141,14 @@
return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
}
+ RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
+ {
+ Value *xx = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
+ Value *yy = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), y.value, 0);
+
+ return UnpackLow(As<Byte8>(xx), As<Byte8>(yy));
+ }
+
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
{
if(CPUID::supportsMMX2())
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 74a3f6c..cfb005c 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -564,6 +564,7 @@
RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y);
RValue<Short4> Unpack(RValue<Byte4> x);
+ RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y);
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y);
RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y);
RValue<Int> SignMask(RValue<Byte8> x);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 9cd6fb8..5468cb9 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -40,8 +40,8 @@
#include <Windows.h>
#else
#include <sys/mman.h>
-#if !defined(MAP_ANONYMOUS)
-#define MAP_ANONYMOUS MAP_ANON
+#if !defined(MAP_ANONYMOUS)
+#define MAP_ANONYMOUS MAP_ANON
#endif
#endif
@@ -2572,6 +2572,11 @@
return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
+ RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
+ {
+ return UnpackLow(As<Byte8>(x), As<Byte8>(y));
+ }
+
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
{
int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 67b0f05..18feac3 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1690,7 +1690,7 @@
uuuu = applyOffset(uuuu, offset.x, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, width))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU);
vvvv = applyOffset(vvvv, offset.y, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, height))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV);
}
-
+
Short4 uuu2 = uuuu;
uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
@@ -1776,12 +1776,12 @@
{
case 4:
{
- Byte8 c0 = *Pointer<Byte8>(buffer[f0] + 4 * index[0]);
- Byte8 c1 = *Pointer<Byte8>(buffer[f1] + 4 * index[1]);
- Byte8 c2 = *Pointer<Byte8>(buffer[f2] + 4 * index[2]);
- Byte8 c3 = *Pointer<Byte8>(buffer[f3] + 4 * index[3]);
- c.x = UnpackLow(c0, c1);
- c.y = UnpackLow(c2, c3);
+ Byte4 c0 = *Pointer<Byte4>(buffer[f0] + 4 * index[0]);
+ Byte4 c1 = *Pointer<Byte4>(buffer[f1] + 4 * index[1]);
+ Byte4 c2 = *Pointer<Byte4>(buffer[f2] + 4 * index[2]);
+ Byte4 c3 = *Pointer<Byte4>(buffer[f3] + 4 * index[3]);
+ c.x = Unpack(c0, c1);
+ c.y = Unpack(c2, c3);
switch(state.textureFormat)
{
@@ -1819,12 +1819,12 @@
break;
case 3:
{
- Byte8 c0 = *Pointer<Byte8>(buffer[f0] + 4 * index[0]);
- Byte8 c1 = *Pointer<Byte8>(buffer[f1] + 4 * index[1]);
- Byte8 c2 = *Pointer<Byte8>(buffer[f2] + 4 * index[2]);
- Byte8 c3 = *Pointer<Byte8>(buffer[f3] + 4 * index[3]);
- c.x = UnpackLow(c0, c1);
- c.y = UnpackLow(c2, c3);
+ Byte4 c0 = *Pointer<Byte4>(buffer[f0] + 4 * index[0]);
+ Byte4 c1 = *Pointer<Byte4>(buffer[f1] + 4 * index[1]);
+ Byte4 c2 = *Pointer<Byte4>(buffer[f2] + 4 * index[2]);
+ Byte4 c3 = *Pointer<Byte4>(buffer[f3] + 4 * index[3]);
+ c.x = Unpack(c0, c1);
+ c.y = Unpack(c2, c3);
switch(state.textureFormat)
{