Emulate SignMask intrinsics. Bug b/37496809 Change-Id: I34d0d511bd50890c36d292ab431115f5d2e65a58 Reviewed-on: https://swiftshader-review.googlesource.com/9492 Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp index 3ed9d50..172f064 100644 --- a/src/Reactor/SubzeroReactor.cpp +++ b/src/Reactor/SubzeroReactor.cpp
@@ -2710,6 +2710,16 @@ return RValue<Byte8>(Nucleus::createNot(val.value)); } + RValue<Byte> Extract(RValue<Byte8> val, int i) + { + return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i)); + } + + RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i) + { + return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i)); + } + RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y) { Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8); @@ -2760,16 +2770,64 @@ return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE)); } + RValue<SByte> Extract(RValue<SByte8> val, int i) + { + return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i)); + } + + RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i) + { + return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i)); + } + + RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs) + { + if(emulateIntrinsics) + { + SByte8 result; + result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0); + result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1); + result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2); + result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3); + result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4); + result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5); + result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6); + result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7); + + return result; + } + else + { + #if defined(__i386__) || defined(__x86_64__) + // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine. + RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00); + RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8); + + return As<SByte8>(hi | lo); + #else + return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs)))); + #endif + } + } + RValue<Int> SignMask(RValue<Byte8> x) { - Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32); - const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F}; - auto target = ::context->getConstantUndef(Ice::IceType_i32); - auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic); - movmsk->addArg(x.value); - ::basicBlock->appendInst(movmsk); + if(emulateIntrinsics) + { + Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80); + return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7)); + } + else + { + Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32); + const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F}; + auto target = ::context->getConstantUndef(Ice::IceType_i32); + auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic); + movmsk->addArg(x.value); + ::basicBlock->appendInst(movmsk); - return RValue<Int>(V(result)); + return RValue<Int>(V(result)); + } } // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y) @@ -2991,14 +3049,22 @@ RValue<Int> SignMask(RValue<SByte8> x) { - Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32); - const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F}; - auto target = ::context->getConstantUndef(Ice::IceType_i32); - auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic); - movmsk->addArg(x.value); - ::basicBlock->appendInst(movmsk); + if(emulateIntrinsics) + { + SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80); + return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7)); + } + else + { + Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32); + const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F}; + auto target = ::context->getConstantUndef(Ice::IceType_i32); + auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic); + movmsk->addArg(x.value); + ::basicBlock->appendInst(movmsk); - return RValue<Int>(V(result)); + return RValue<Int>(V(result)); + } } RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y) @@ -5679,14 +5745,22 @@ RValue<Int> SignMask(RValue<Int4> x) { - Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32); - const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F}; - auto target = ::context->getConstantUndef(Ice::IceType_i32); - auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic); - movmsk->addArg(x.value); - ::basicBlock->appendInst(movmsk); + if(emulateIntrinsics) + { + Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008); + return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3); + } + else + { + Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32); + const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F}; + auto target = ::context->getConstantUndef(Ice::IceType_i32); + auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic); + movmsk->addArg(x.value); + ::basicBlock->appendInst(movmsk); - return RValue<Int>(V(result)); + return RValue<Int>(V(result)); + } } RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select) @@ -6592,14 +6666,22 @@ RValue<Int> SignMask(RValue<Float4> x) { - Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32); - const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F}; - auto target = ::context->getConstantUndef(Ice::IceType_i32); - auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic); - movmsk->addArg(x.value); - ::basicBlock->appendInst(movmsk); + if(emulateIntrinsics) + { + Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008); + return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3); + } + else + { + Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32); + const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F}; + auto target = ::context->getConstantUndef(Ice::IceType_i32); + auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic); + movmsk->addArg(x.value); + ::basicBlock->appendInst(movmsk); - return RValue<Int>(V(result)); + return RValue<Int>(V(result)); + } } RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)