Fix packusdw SSE2 fallback. Bug swiftshader:20 Change-Id: I81ad267d450713ffe2a5a84e1d7f7f140b515c85 Reviewed-on: https://swiftshader-review.googlesource.com/8454 Reviewed-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Nicolas Capens <capn@google.com> Tested-by: Nicolas Capens <capn@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp index e1c4c92..42578fe 100644 --- a/src/Reactor/LLVMReactor.cpp +++ b/src/Reactor/LLVMReactor.cpp
@@ -3017,11 +3017,11 @@ if(!saturate || !CPUID::supportsSSE4_1()) { - *this = Short4(Int4(int4)); + *this = Short4(int4); } else { - *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4))))); + *this = As<Short4>(Int2(As<Int4>(x86::packusdw(int4, int4)))); } } @@ -3276,6 +3276,12 @@ } } + Short8::Short8(short c) + { + int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; + storeValue(Nucleus::createConstantVector(constantVector, getType())); + } + Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) { int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; @@ -3354,6 +3360,12 @@ return T(VectorType::get(Short::getType(), 8)); } + UShort8::UShort8(unsigned short c) + { + int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; + storeValue(Nucleus::createConstantVector(constantVector, getType())); + } + UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) { int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; @@ -5552,7 +5564,7 @@ RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y) { - return x86::packusdw(x, y); // FIXME: Fallback required + return x86::packusdw(As<Int4>(x), As<Int4>(y)); } Type *UInt4::getType() @@ -6888,7 +6900,7 @@ return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value))); } - RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y) + RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y) { if(CPUID::supportsSSE4_1()) { @@ -6898,8 +6910,10 @@ } else { - // FIXME: Not an exact replacement! - return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u)); + RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000); + RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000); + + return As<UShort8>(packssdw(bx, by) + Short8(0x8000u)); } }
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp index 3adebd6..859e98f 100644 --- a/src/Reactor/Reactor.hpp +++ b/src/Reactor/Reactor.hpp
@@ -870,6 +870,7 @@ { public: Short8() = default; + Short8(short c); Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7); Short8(RValue<Short8> rhs); // Short8(const Short8 &rhs); @@ -927,6 +928,7 @@ { public: UShort8() = default; + UShort8(unsigned short c); UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7); UShort8(RValue<UShort8> rhs); // UShort8(const UShort8 &rhs);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp index 77272f3d..0a78f6e 100644 --- a/src/Reactor/SubzeroReactor.cpp +++ b/src/Reactor/SubzeroReactor.cpp
@@ -3534,6 +3534,12 @@ return T(Type_v4i16); } + Short8::Short8(short c) + { + int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; + storeValue(Nucleus::createConstantVector(constantVector, getType())); + } + Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) { int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; @@ -3600,6 +3606,12 @@ return T(Ice::IceType_v8i16); } + UShort8::UShort8(unsigned short c) + { + int64_t constantVector[8] = {c, c, c, c, c, c, c, c}; + storeValue(Nucleus::createConstantVector(constantVector, getType())); + } + UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) { int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
diff --git a/src/Reactor/x86.hpp b/src/Reactor/x86.hpp index 5c08167..038a49d 100644 --- a/src/Reactor/x86.hpp +++ b/src/Reactor/x86.hpp
@@ -110,7 +110,7 @@ RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y); RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y); - RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y); + RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y); RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y); RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y);