Support LLVM 8+ with Reactor

LLVM 8 removes several x86 SSE intrinsics for saturated arithmetic, but
provides target-independent replacements. Note there are now three code
paths for these vector operations:
- With LLVM 8+, use the new target-independent intrinsics.
- With LLVM 7 on x86, use the SSE intrinsics.
- On other architectures, lower to a generic sequence of vector
  operations (lowerPSAT).

Bug: b/139412871
Change-Id: I849f8b5c004849c628507d6377779df170a11d93
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/36788
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 546ac7f..29b18a9 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -309,8 +309,14 @@
 	// settings and no Reactor routine directly links against another.
 	class JITRoutine : public rr::Routine
 	{
+#if LLVM_VERSION_MAJOR >= 8
+		using ObjLayer = llvm::orc::LegacyRTDyldObjectLinkingLayer;
+		using CompileLayer = llvm::orc::LegacyIRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
+#else
 		using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
 		using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
+#endif
+
 	public:
 		JITRoutine(
 				std::unique_ptr<llvm::Module> module,
@@ -611,7 +617,7 @@
 		return jit->builder->CreateCall(trunc, ARGS(x));
 	}
 
-	// Packed add/sub saturatation
+	// Packed add/sub with saturation
 	llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
 	{
 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
@@ -646,26 +652,6 @@
 		return jit->builder->CreateTrunc(res, ty);
 	}
 
-	llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
-	{
-		return lowerPSAT(x, y, true, false);
-	}
-
-	llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
-	{
-		return lowerPSAT(x, y, true, true);
-	}
-
-	llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
-	{
-		return lowerPSAT(x, y, false, false);
-	}
-
-	llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
-	{
-		return lowerPSAT(x, y, false, true);
-	}
-
 	llvm::Value *lowerSQRT(llvm::Value *x)
 	{
 		llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
@@ -817,6 +803,44 @@
 	}
 #endif  // !defined(__i386__) && !defined(__x86_64__)
 
+#if (LLVM_VERSION_MAJOR >= 8) || (!defined(__i386__) && !defined(__x86_64__))
+	llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
+	{
+		#if LLVM_VERSION_MAJOR >= 8
+			return jit->builder->CreateBinaryIntrinsic(llvm::Intrinsic::uadd_sat, x, y);
+		#else
+			return lowerPSAT(x, y, true, false);
+		#endif
+	}
+
+	llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
+	{
+		#if LLVM_VERSION_MAJOR >= 8
+			return jit->builder->CreateBinaryIntrinsic(llvm::Intrinsic::sadd_sat, x, y);
+		#else
+			return lowerPSAT(x, y, true, true);
+		#endif
+	}
+
+	llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
+	{
+		#if LLVM_VERSION_MAJOR >= 8
+			return jit->builder->CreateBinaryIntrinsic(llvm::Intrinsic::usub_sat, x, y);
+		#else
+			return lowerPSAT(x, y, false, false);
+		#endif
+	}
+
+	llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
+	{
+		#if LLVM_VERSION_MAJOR >= 8
+			return jit->builder->CreateBinaryIntrinsic(llvm::Intrinsic::ssub_sat, x, y);
+		#else
+			return lowerPSAT(x, y, false, true);
+		#endif
+	}
+#endif  // (LLVM_VERSION_MAJOR >= 8) || (!defined(__i386__) && !defined(__x86_64__))
+
 	llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
 	{
 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
@@ -4095,58 +4119,90 @@
 
 		RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
 		{
-			llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_w);
+			#if LLVM_VERSION_MAJOR >= 8
+				return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
+			#else
+				llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_w);
 
-			return As<Short4>(V(jit->builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
+				return As<Short4>(V(jit->builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
+			#endif
 		}
 
 		RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
 		{
-			llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_w);
+			#if LLVM_VERSION_MAJOR >= 8
+				return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
+			#else
+				llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_w);
 
-			return As<Short4>(V(jit->builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
+				return As<Short4>(V(jit->builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
+			#endif
 		}
 
 		RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
 		{
-			llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_w);
+			#if LLVM_VERSION_MAJOR >= 8
+				return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
+			#else
+				llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_w);
 
-			return As<UShort4>(V(jit->builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
+				return As<UShort4>(V(jit->builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
+			#endif
 		}
 
 		RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
 		{
-			llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_w);
+			#if LLVM_VERSION_MAJOR >= 8
+				return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
+			#else
+				llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_w);
 
-			return As<UShort4>(V(jit->builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
+				return As<UShort4>(V(jit->builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
+			#endif
 		}
 
 		RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
 		{
-			llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_b);
+			#if LLVM_VERSION_MAJOR >= 8
+				return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
+			#else
+				llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_b);
 
-			return As<SByte8>(V(jit->builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
+				return As<SByte8>(V(jit->builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
+			#endif
 		}
 
 		RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
 		{
-			llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_b);
+			#if LLVM_VERSION_MAJOR >= 8
+				return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
+			#else
+				llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_b);
 
-			return As<SByte8>(V(jit->builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
+				return As<SByte8>(V(jit->builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
+			#endif
 		}
 
 		RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
 		{
-			llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_b);
+			#if LLVM_VERSION_MAJOR >= 8
+				return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
+			#else
+				llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_b);
 
-			return As<Byte8>(V(jit->builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
+				return As<Byte8>(V(jit->builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
+			#endif
 		}
 
 		RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
 		{
-			llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_b);
+			#if LLVM_VERSION_MAJOR >= 8
+				return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
+			#else
+				llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_b);
 
-			return As<Byte8>(V(jit->builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
+				return As<Byte8>(V(jit->builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
+			#endif
 		}
 
 		RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)