Emulate SignMask intrinsics.
Bug b/37496809
Change-Id: I34d0d511bd50890c36d292ab431115f5d2e65a58
Reviewed-on: https://swiftshader-review.googlesource.com/9492
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 3ed9d50..172f064 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -2710,6 +2710,16 @@
return RValue<Byte8>(Nucleus::createNot(val.value));
}
+ RValue<Byte> Extract(RValue<Byte8> val, int i)
+ {
+ return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
+ }
+
+ RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
+ {
+ return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
{
Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
@@ -2760,16 +2770,64 @@
return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
}
+ RValue<SByte> Extract(RValue<SByte8> val, int i)
+ {
+ return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
+ }
+
+ RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
+ {
+ return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
+ RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
+ {
+ if(emulateIntrinsics)
+ {
+ SByte8 result;
+ result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ #if defined(__i386__) || defined(__x86_64__)
+ // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
+ RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00);
+ RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
+
+ return As<SByte8>(hi | lo);
+ #else
+ return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ #endif
+ }
+ }
+
RValue<Int> SignMask(RValue<Byte8> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- movmsk->addArg(x.value);
- ::basicBlock->appendInst(movmsk);
+ if(emulateIntrinsics)
+ {
+ Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
+ return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ movmsk->addArg(x.value);
+ ::basicBlock->appendInst(movmsk);
- return RValue<Int>(V(result));
+ return RValue<Int>(V(result));
+ }
}
// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
@@ -2991,14 +3049,22 @@
RValue<Int> SignMask(RValue<SByte8> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- movmsk->addArg(x.value);
- ::basicBlock->appendInst(movmsk);
+ if(emulateIntrinsics)
+ {
+ SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
+ return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ movmsk->addArg(x.value);
+ ::basicBlock->appendInst(movmsk);
- return RValue<Int>(V(result));
+ return RValue<Int>(V(result));
+ }
}
RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
@@ -5679,14 +5745,22 @@
RValue<Int> SignMask(RValue<Int4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- movmsk->addArg(x.value);
- ::basicBlock->appendInst(movmsk);
+ if(emulateIntrinsics)
+ {
+ Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
+ return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ movmsk->addArg(x.value);
+ ::basicBlock->appendInst(movmsk);
- return RValue<Int>(V(result));
+ return RValue<Int>(V(result));
+ }
}
RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
@@ -6592,14 +6666,22 @@
RValue<Int> SignMask(RValue<Float4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- movmsk->addArg(x.value);
- ::basicBlock->appendInst(movmsk);
+ if(emulateIntrinsics)
+ {
+ Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
+ return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ movmsk->addArg(x.value);
+ ::basicBlock->appendInst(movmsk);
- return RValue<Int>(V(result));
+ return RValue<Int>(V(result));
+ }
}
RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)