Fix rr::RoundIntClamped() for architectures other than x86 and ARM Previously we assumed that non-x86 architectures return clamped results for out-of-range inputs to casting floating-point numbers to integers. This is true at least for ARM, but not for RISC-V and possibly others. An implementation which depends on LLVM's fptosi_sat intrinsic has been added, which requires a recent version of LLVM. When not available, we fall back to explicitly clamping both the upper and lower bound. Bug: b/217573066 Bug: b/165000222 Change-Id: Ia5f1f6de395f52852044c7e1f875c1164b18f09b Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/62728 Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com>

commit: 4f504b04291bd47fa7b246f7ed3a59da041a074b [log] [tgz]
author: Nicolas Capens <capn@google.com> Thu Feb 10 14:13:50 2022 -0500
committer: Nicolas Capens <nicolascapens@google.com> Thu Feb 17 14:26:14 2022 +0000
tree: 3d111e32742a48157d445a9df7fb48855604d999
parent: 4228bb95b5b56f6b0f9ded5c7910bbe773a4c9d2 [diff]
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 8f564c5..6273115 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp

@@ -2720,15 +2720,25 @@
 RValue<Int4> RoundIntClamped(RValue<Float4> cast)
 {
 	RR_DEBUG_INFO_UPDATE_LOC();
+
+// TODO(b/165000222): Check if fptosi_sat produces optimal code for x86 and ARM.
 #if defined(__i386__) || defined(__x86_64__)
 	// cvtps2dq produces 0x80000000, a negative value, for input larger than
 	// 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
 	// saturate to 0x80000000.
 	return x86::cvtps2dq(Min(cast, Float4(0x7FFFFF80)));
-#else
+#elif defined(__arm__) || defined(__aarch64__)
 	// ARM saturates to the largest positive or negative integer. Unit tests
 	// verify that lowerRoundInt() behaves as desired.
 	return As<Int4>(V(lowerRoundInt(V(cast.value()), T(Int4::type()))));
+#elif LLVM_VERSION_MAJOR >= 14
+	llvm::Value *rounded = lowerRound(V(cast.value()));
+	llvm::Function *fptosi_sat = llvm::Intrinsic::getDeclaration(
+	    jit->module.get(), llvm::Intrinsic::fptosi_sat, { T(Int4::type()), T(Float4::type()) });
+	return RValue<Int4>(V(jit->builder->CreateCall(fptosi_sat, { rounded })));
+#else
+	RValue<Float4> clamped = Max(Min(cast, Float4(0x7FFFFF80)), Float4(0x80000000));
+	return As<Int4>(V(lowerRoundInt(V(clamped.value()), T(Int4::type()))));
 #endif
 }
commit	4f504b04291bd47fa7b246f7ed3a59da041a074b	[log] [tgz]
author	Nicolas Capens <capn@google.com>	Thu Feb 10 14:13:50 2022 -0500
committer	Nicolas Capens <nicolascapens@google.com>	Thu Feb 17 14:26:14 2022 +0000
tree	3d111e32742a48157d445a9df7fb48855604d999
parent	4228bb95b5b56f6b0f9ded5c7910bbe773a4c9d2 [diff]