Emit SSE2 compatible operations if SSE4.1 is not supported.

Bug swiftshader:20

Change-Id: I67818bfe10cb29211559fb2ee047f6bec6ce46d4
Reviewed-on: https://swiftshader-review.googlesource.com/8451
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index bd23e35..07c486f 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3311,7 +3311,7 @@
 	{
 		if(saturate)
 		{
-			if(true)   // SSE 4.1
+			if(CPUID::SSE4_1)
 			{
 				Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
 				*this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
@@ -4074,11 +4074,12 @@
 
 	RValue<Int> RoundInt(RValue<Float> cast)
 	{
-		RValue<Float> rounded = Round(cast);
-
 		Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
-		auto round = Ice::InstCast::create(::function, Ice::InstCast::Fptosi, result, rounded.value);
-		::basicBlock->appendInst(round);
+		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+		auto target = ::context->getConstantUndef(Ice::IceType_i32);
+		auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+		nearbyint->addArg(cast.value);
+		::basicBlock->appendInst(nearbyint);
 
 		return RValue<Int>(V(result));
 	}
@@ -5229,11 +5230,12 @@
 
 	RValue<Int4> RoundInt(RValue<Float4> cast)
 	{
-		RValue<Float4> rounded = Round(cast);
-
 		Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
-		auto round = Ice::InstCast::create(::function, Ice::InstCast::Fptosi, result, rounded.value);
-		::basicBlock->appendInst(round);
+		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+		auto target = ::context->getConstantUndef(Ice::IceType_i32);
+		auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+		nearbyint->addArg(cast.value);
+		::basicBlock->appendInst(nearbyint);
 
 		return RValue<Int4>(V(result));
 	}
@@ -5573,15 +5575,28 @@
 
 	RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
-		pack->addArg(x.value);
-		pack->addArg(y.value);
-		::basicBlock->appendInst(pack);
+		if(CPUID::SSE4_1)
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+			pack->addArg(x.value);
+			pack->addArg(y.value);
+			::basicBlock->appendInst(pack);
 
-		return RValue<UShort8>(V(result));
+			return RValue<UShort8>(V(result));
+		}
+		else
+		{
+			RValue<Int4> sx = As<Int4>(x);
+			RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
+
+			RValue<Int4> sy = As<Int4>(y);
+			RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
+
+			return As<UShort8>(Pack(bx, by) + Short8(0x8000u));
+		}
 	}
 
 	Type *UInt4::getType()
@@ -6162,59 +6177,96 @@
 
 	RValue<Float4> Round(RValue<Float4> x)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
-		round->addArg(x.value);
-		round->addArg(::context->getConstantInt32(0));
-		::basicBlock->appendInst(round);
+		if(CPUID::SSE4_1)
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+			round->addArg(x.value);
+			round->addArg(::context->getConstantInt32(0));
+			::basicBlock->appendInst(round);
 
-		return RValue<Float4>(V(result));
+			return RValue<Float4>(V(result));
+		}
+		else
+		{
+			return Float4(RoundInt(x));
+		}
 	}
 
 	RValue<Float4> Trunc(RValue<Float4> x)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
-		round->addArg(x.value);
-		round->addArg(::context->getConstantInt32(3));
-		::basicBlock->appendInst(round);
+		if(CPUID::SSE4_1)
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+			round->addArg(x.value);
+			round->addArg(::context->getConstantInt32(3));
+			::basicBlock->appendInst(round);
 
-		return RValue<Float4>(V(result));
+			return RValue<Float4>(V(result));
+		}
+		else
+		{
+			return Float4(Int4(x));
+		}
 	}
 
 	RValue<Float4> Frac(RValue<Float4> x)
 	{
-		return x - Floor(x);
+		if(CPUID::SSE4_1)
+		{
+			return x - Floor(x);
+		}
+		else
+		{
+			Float4 frc = x - Float4(Int4(x));   // Signed fractional part
+
+			return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
+		}
 	}
 
 	RValue<Float4> Floor(RValue<Float4> x)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
-		round->addArg(x.value);
-		round->addArg(::context->getConstantInt32(1));
-		::basicBlock->appendInst(round);
+		if(CPUID::SSE4_1)
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+			round->addArg(x.value);
+			round->addArg(::context->getConstantInt32(1));
+			::basicBlock->appendInst(round);
 
-		return RValue<Float4>(V(result));
+			return RValue<Float4>(V(result));
+		}
+		else
+		{
+			return x - Frac(x);
+		}
 	}
 
 	RValue<Float4> Ceil(RValue<Float4> x)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
-		round->addArg(x.value);
-		round->addArg(::context->getConstantInt32(2));
-		::basicBlock->appendInst(round);
+		if(CPUID::SSE4_1)
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+			round->addArg(x.value);
+			round->addArg(::context->getConstantInt32(2));
+			::basicBlock->appendInst(round);
 
-		return RValue<Float4>(V(result));
+			return RValue<Float4>(V(result));
+		}
+		else
+		{
+			return -Floor(-x);
+		}
 	}
 
 	Type *Float4::getType()