Emulate rounding to the nearest integer. This implementation works by adding a large value which makes the fractional part no longer fit in the mantissa, and then subtracting it again. It matches nearbyint() for values up to 2^22, positive or negative. The 'magic number' of 0x00C00000 is derived by first observing that the integer values 0x00800000 to 0x00FFFFFF can be represented exactly in single-precision floating-point format but can't have a fractional part because there are 24 mantissa bits (the top one being hidden). So when adding 0x00800000 to for example 0.6, it forces the hardware to round it to the nearest representable integer, being 0x00800001. Subtracting 0x00800000 again gives us 1.0. This works for rounding any value from 0.0 to 0x007FFFFF. However, it doesn't work for negative values, because the intermediate result would be less than 0x00800000 and thus leave some room for fractional bits in the mantissa. The solution is to use 0x00C00000 instead so the range gets split between positive and negative values. Note that values greater than the upper bound will still round to integers, but not the nearest ones, while values less than the lower bound can result in fractional values. Bug b/37495485 Change-Id: I1aed2d831269fcf21b8d3313856a9b9756a532ef Reviewed-on: https://swiftshader-review.googlesource.com/9488 Reviewed-by: Nicolas Capens <capn@google.com> Reviewed-by: Corentin Wallez <cwallez@google.com> Tested-by: Nicolas Capens <capn@google.com>

commit: f7b758899c8a260de81bf9fdfea70cb4d7a9816b [log] [tgz]
author: Nicolas Capens <capn@google.com> Wed Apr 26 09:30:47 2017 -0400
committer: Nicolas Capens <capn@google.com> Tue May 02 14:02:21 2017 +0000
tree: c4c7af1b7a9a04bea2b06913198cfbad1267b524
parent: ac6e751fe68241019df1e5da149a7fb9ffc6a05d [diff] [blame]
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 5222ddb..c1d7cb9 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp

@@ -77,6 +77,7 @@
 	class CPUID
 	{
 	public:
+		const static bool ARM;
 		const static bool SSE4_1;
 
 	private:
@@ -96,6 +97,17 @@
 			#endif
 		}
 
+		static bool detectARM()
+		{
+			#if defined(__arm__)
+				return true;
+			#elif defined(__i386__) || defined(__x86_64__)
+				return false;
+			#else
+				#error "Unknown architecture"
+			#endif
+		}
+
 		static bool detectSSE4_1()
 		{
 			#if defined(__i386__) || defined(__x86_64__)
@@ -108,7 +120,9 @@
 		}
 	};
 
+	const bool CPUID::ARM = CPUID::detectARM();
 	const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
+	const bool emulateIntrinsics = CPUID::ARM;
 }
 
 namespace sw
@@ -4146,14 +4160,22 @@
 
 	RValue<Int> RoundInt(RValue<Float> cast)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
-		nearbyint->addArg(cast.value);
-		::basicBlock->appendInst(nearbyint);
+		if(emulateIntrinsics)
+		{
+			// Push the fractional part off the mantissa. Accurate up to +/-2^22.
+			return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
+		}
+		else
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+			nearbyint->addArg(cast.value);
+			::basicBlock->appendInst(nearbyint);
 
-		return RValue<Int>(V(result));
+			return RValue<Int>(V(result));
+		}
 	}
 
 	Type *Int::getType()
@@ -5301,14 +5323,22 @@
 
 	RValue<Int4> RoundInt(RValue<Float4> cast)
 	{
-		Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
-		const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
-		auto target = ::context->getConstantUndef(Ice::IceType_i32);
-		auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
-		nearbyint->addArg(cast.value);
-		::basicBlock->appendInst(nearbyint);
+		if(emulateIntrinsics)
+		{
+			// Push the fractional part off the mantissa. Accurate up to +/-2^22.
+			return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
+		}
+		else
+		{
+			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
+			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+			auto target = ::context->getConstantUndef(Ice::IceType_i32);
+			auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+			nearbyint->addArg(cast.value);
+			::basicBlock->appendInst(nearbyint);
 
-		return RValue<Int4>(V(result));
+			return RValue<Int4>(V(result));
+		}
 	}
 
 	RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
@@ -6247,7 +6277,12 @@
 
 	RValue<Float4> Round(RValue<Float4> x)
 	{
-		if(CPUID::SSE4_1)
+		if(emulateIntrinsics)
+		{
+			// Push the fractional part off the mantissa. Accurate up to +/-2^22.
+			return (x + Float4(0x00C00000)) - Float4(0x00C00000);
+		}
+		else if(CPUID::SSE4_1)
 		{
 			Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
 			const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
commit	f7b758899c8a260de81bf9fdfea70cb4d7a9816b	[log] [tgz]
author	Nicolas Capens <capn@google.com>	Wed Apr 26 09:30:47 2017 -0400
committer	Nicolas Capens <capn@google.com>	Tue May 02 14:02:21 2017 +0000
tree	c4c7af1b7a9a04bea2b06913198cfbad1267b524
parent	ac6e751fe68241019df1e5da149a7fb9ffc6a05d [diff] [blame]