Emit SSE2 compatible operations if SSE4.1 is not supported.
Bug swiftshader:20
Change-Id: I67818bfe10cb29211559fb2ee047f6bec6ce46d4
Reviewed-on: https://swiftshader-review.googlesource.com/8451
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index bd23e35..07c486f 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3311,7 +3311,7 @@
{
if(saturate)
{
- if(true) // SSE 4.1
+ if(CPUID::SSE4_1)
{
Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
*this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
@@ -4074,11 +4074,12 @@
RValue<Int> RoundInt(RValue<Float> cast)
{
- RValue<Float> rounded = Round(cast);
-
Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- auto round = Ice::InstCast::create(::function, Ice::InstCast::Fptosi, result, rounded.value);
- ::basicBlock->appendInst(round);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ nearbyint->addArg(cast.value);
+ ::basicBlock->appendInst(nearbyint);
return RValue<Int>(V(result));
}
@@ -5229,11 +5230,12 @@
RValue<Int4> RoundInt(RValue<Float4> cast)
{
- RValue<Float4> rounded = Round(cast);
-
Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
- auto round = Ice::InstCast::create(::function, Ice::InstCast::Fptosi, result, rounded.value);
- ::basicBlock->appendInst(round);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ nearbyint->addArg(cast.value);
+ ::basicBlock->appendInst(nearbyint);
return RValue<Int4>(V(result));
}
@@ -5573,15 +5575,28 @@
RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- pack->addArg(x.value);
- pack->addArg(y.value);
- ::basicBlock->appendInst(pack);
+ if(CPUID::SSE4_1)
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ pack->addArg(x.value);
+ pack->addArg(y.value);
+ ::basicBlock->appendInst(pack);
- return RValue<UShort8>(V(result));
+ return RValue<UShort8>(V(result));
+ }
+ else
+ {
+ RValue<Int4> sx = As<Int4>(x);
+ RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
+
+ RValue<Int4> sy = As<Int4>(y);
+ RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
+
+ return As<UShort8>(Pack(bx, by) + Short8(0x8000u));
+ }
}
Type *UInt4::getType()
@@ -6162,59 +6177,96 @@
RValue<Float4> Round(RValue<Float4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- round->addArg(x.value);
- round->addArg(::context->getConstantInt32(0));
- ::basicBlock->appendInst(round);
+ if(CPUID::SSE4_1)
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ round->addArg(x.value);
+ round->addArg(::context->getConstantInt32(0));
+ ::basicBlock->appendInst(round);
- return RValue<Float4>(V(result));
+ return RValue<Float4>(V(result));
+ }
+ else
+ {
+ return Float4(RoundInt(x));
+ }
}
RValue<Float4> Trunc(RValue<Float4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- round->addArg(x.value);
- round->addArg(::context->getConstantInt32(3));
- ::basicBlock->appendInst(round);
+ if(CPUID::SSE4_1)
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ round->addArg(x.value);
+ round->addArg(::context->getConstantInt32(3));
+ ::basicBlock->appendInst(round);
- return RValue<Float4>(V(result));
+ return RValue<Float4>(V(result));
+ }
+ else
+ {
+ return Float4(Int4(x));
+ }
}
RValue<Float4> Frac(RValue<Float4> x)
{
- return x - Floor(x);
+ if(CPUID::SSE4_1)
+ {
+ return x - Floor(x);
+ }
+ else
+ {
+ Float4 frc = x - Float4(Int4(x)); // Signed fractional part
+
+ return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
+ }
}
RValue<Float4> Floor(RValue<Float4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- round->addArg(x.value);
- round->addArg(::context->getConstantInt32(1));
- ::basicBlock->appendInst(round);
+ if(CPUID::SSE4_1)
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ round->addArg(x.value);
+ round->addArg(::context->getConstantInt32(1));
+ ::basicBlock->appendInst(round);
- return RValue<Float4>(V(result));
+ return RValue<Float4>(V(result));
+ }
+ else
+ {
+ return x - Frac(x);
+ }
}
RValue<Float4> Ceil(RValue<Float4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- round->addArg(x.value);
- round->addArg(::context->getConstantInt32(2));
- ::basicBlock->appendInst(round);
+ if(CPUID::SSE4_1)
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ round->addArg(x.value);
+ round->addArg(::context->getConstantInt32(2));
+ ::basicBlock->appendInst(round);
- return RValue<Float4>(V(result));
+ return RValue<Float4>(V(result));
+ }
+ else
+ {
+ return -Floor(-x);
+ }
}
Type *Float4::getType()