Fix rr::RoundIntClamped() for architectures other than x86 and ARM
Previously we assumed that non-x86 architectures return clamped results
for out-of-range inputs to casting floating-point numbers to integers.
This is true at least for ARM, but not for RISC-V and possibly others.
An implementation which depends on LLVM's fptosi_sat intrinsic has been
added, which requires a recent version of LLVM. When not available, we
fall back to explicitly clamping both the upper and lower bound.
Bug: b/217573066
Bug: b/165000222
Change-Id: Ia5f1f6de395f52852044c7e1f875c1164b18f09b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/62728
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 8f564c5..6273115 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -2720,15 +2720,25 @@
RValue<Int4> RoundIntClamped(RValue<Float4> cast)
{
RR_DEBUG_INFO_UPDATE_LOC();
+
+// TODO(b/165000222): Check if fptosi_sat produces optimal code for x86 and ARM.
#if defined(__i386__) || defined(__x86_64__)
// cvtps2dq produces 0x80000000, a negative value, for input larger than
// 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
// saturate to 0x80000000.
return x86::cvtps2dq(Min(cast, Float4(0x7FFFFF80)));
-#else
+#elif defined(__arm__) || defined(__aarch64__)
// ARM saturates to the largest positive or negative integer. Unit tests
// verify that lowerRoundInt() behaves as desired.
return As<Int4>(V(lowerRoundInt(V(cast.value()), T(Int4::type()))));
+#elif LLVM_VERSION_MAJOR >= 14
+ llvm::Value *rounded = lowerRound(V(cast.value()));
+ llvm::Function *fptosi_sat = llvm::Intrinsic::getDeclaration(
+ jit->module.get(), llvm::Intrinsic::fptosi_sat, { T(Int4::type()), T(Float4::type()) });
+ return RValue<Int4>(V(jit->builder->CreateCall(fptosi_sat, { rounded })));
+#else
+ RValue<Float4> clamped = Max(Min(cast, Float4(0x7FFFFF80)), Float4(0x80000000));
+ return As<Int4>(V(lowerRoundInt(V(clamped.value()), T(Int4::type()))));
#endif
}