Refactor vector packing. x86 vector packing instructions always treat the input as having signed integer components, but can perform signed or unsigned saturation on the output. In Reactor the Pack() intrinsic has overloads which differentiate between them based on the signedness of the input, but this is confusing. Also simplify emulation of saturating add/subtract. Bug b/37496082 Change-Id: I0625fff429ffb40f42baf9600c7760d9858b5d89 Reviewed-on: https://swiftshader-review.googlesource.com/12548 Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>

commit: 33438a6882d629139617d80cc7fdbc687b71a794 [log] [tgz]
author: Nicolas Capens <capn@google.com> Wed Sep 27 11:47:35 2017 -0400
committer: Nicolas Capens <nicolascapens@google.com> Wed Sep 27 19:46:15 2017 +0000
tree: 4e8c64bc7d54c9870d1f61e719bfc60d8d2afcd3
parent: e6c3aa239f28e4bb397d2abf50bfbef3ab9a7389 [diff] [blame]
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 8abb17a..59e7e09 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp

@@ -2791,7 +2791,7 @@
 	RValue<Short4> RoundShort4(RValue<Float4> cast)
 	{
 		RValue<Int4> int4 = RoundInt(cast);
-		return As<Short4>(Pack(int4, int4));
+		return As<Short4>(PackSigned(int4, int4));
 	}
 
 	RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
@@ -2824,13 +2824,20 @@
 		return x86::pmaddwd(x, y);
 	}
 
-	RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
+	RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
 	{
 		auto result = x86::packsswb(x, y);
 
 		return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
 	}
 
+	RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
+	{
+		auto result = x86::packuswb(x, y);
+
+		return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
+	}
+
 	RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
 	{
 		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
@@ -2899,7 +2906,7 @@
 			if(CPUID::supportsSSE4_1())
 			{
 				Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
-				*this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
+				*this = As<Short4>(PackUnsigned(int4, int4));
 			}
 			else
 			{
@@ -3093,13 +3100,6 @@
 		return x86::pavgw(x, y);
 	}
 
-	RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
-	{
-		auto result = x86::packuswb(x, y);
-
-		return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
-	}
-
 	Type *UShort4::getType()
 	{
 		return T(Type_v4i16);
@@ -4846,11 +4846,16 @@
 		return x86::cvtps2dq(cast);
 	}
 
-	RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
+	RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
 	{
 		return x86::packssdw(x, y);
 	}
 
+	RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
+	{
+		return x86::packusdw(x, y);
+	}
+
 	RValue<Int> Extract(RValue<Int4> x, int i)
 	{
 		return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
@@ -5180,11 +5185,6 @@
 		}
 	}
 
-	RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
-	{
-		return x86::packusdw(As<Int4>(x), As<Int4>(y));
-	}
-
 	Type *UInt4::getType()
 	{
 		return T(llvm::VectorType::get(T(UInt::getType()), 4));
@@ -6205,7 +6205,7 @@
 			return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value)));
 		}
 
-		RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
+		RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
 		{
 			llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
commit	33438a6882d629139617d80cc7fdbc687b71a794	[log] [tgz]
author	Nicolas Capens <capn@google.com>	Wed Sep 27 11:47:35 2017 -0400
committer	Nicolas Capens <nicolascapens@google.com>	Wed Sep 27 19:46:15 2017 +0000
tree	4e8c64bc7d54c9870d1f61e719bfc60d8d2afcd3
parent	e6c3aa239f28e4bb397d2abf50bfbef3ab9a7389 [diff] [blame]