Reactor: Fix Floor generic code generation
This commit fixes Floor() generic LLVM code generation.
Bug: b/115344057
Test: dEQP-GLES3.functional.shaders.builtin_functions.precision.floor
Change-Id: I70ec0babfe9778b7963296734f1901a73b1f696f
Reviewed-on: https://swiftshader-review.googlesource.com/20929
Tested-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 6bc889e..a7aa1ad 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -65,6 +65,8 @@
#define ARGS(...) {__VA_ARGS__}
#define CreateCall2 CreateCall
#define CreateCall3 CreateCall
+
+ #include <unordered_map>
#endif
#include "x86.hpp"
@@ -80,6 +82,8 @@
#include <xmmintrin.h>
#endif
+#include <math.h>
+
#if defined(__x86_64__) && defined(_WIN32)
extern "C" void X86CompilationCallback()
{
@@ -179,6 +183,13 @@
return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
}
+ llvm::Value *lowerFloor(llvm::Value *x)
+ {
+ llvm::Function *floor = llvm::Intrinsic::getDeclaration(
+ ::module, llvm::Intrinsic::floor, {x->getType()});
+ return ::builder->CreateCall(floor, ARGS(x));
+ }
+
// Packed add/sub saturatation
llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
{
@@ -501,6 +512,25 @@
}
};
#else
+ class ExternalFunctionSymbolResolver
+ {
+ private:
+ using FunctionMap = std::unordered_map<std::string, void *>;
+ FunctionMap func_;
+
+ public:
+ ExternalFunctionSymbolResolver()
+ {
+ func_.emplace("floorf", reinterpret_cast<void*>(floorf));
+ }
+
+ void *findSymbol(const std::string &name) const
+ {
+ FunctionMap::const_iterator it = func_.find(name);
+ return (it != func_.end()) ? it->second : nullptr;
+ }
+ };
+
class LLVMReactorJIT
{
private:
@@ -508,6 +538,7 @@
using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
llvm::orc::ExecutionSession session;
+ ExternalFunctionSymbolResolver externalSymbolResolver;
std::shared_ptr<llvm::orc::SymbolResolver> resolver;
std::unique_ptr<llvm::TargetMachine> targetMachine;
const llvm::DataLayout dataLayout;
@@ -521,6 +552,13 @@
resolver(createLegacyLookupResolver(
session,
[this](const std::string &name) {
+ void *func = externalSymbolResolver.findSymbol(name);
+ if (func != nullptr)
+ {
+ return llvm::JITSymbol(
+ reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
+ }
+
return objLayer.findSymbol(name, true);
},
[](llvm::Error err) {
@@ -6247,10 +6285,12 @@
return x86::floorss(x);
}
else
-#endif
{
return Float4(Floor(Float4(x))).x;
}
+#else
+ return RValue<Float>(V(lowerFloor(V(x.value))));
+#endif
}
RValue<Float> Ceil(RValue<Float> x)
@@ -6692,11 +6732,13 @@
{
Float4 frc;
+#if defined(__i386__) || defined(__x86_64__)
if(CPUID::supportsSSE4_1())
{
frc = x - Floor(x);
}
else
+#endif
{
frc = x - Float4(Int4(x)); // Signed fractional part.
@@ -6716,10 +6758,12 @@
return x86::floorps(x);
}
else
-#endif
{
return x - Frac(x);
}
+#else
+ return RValue<Float4>(V(lowerFloor(V(x.value))));
+#endif
}
RValue<Float4> Ceil(RValue<Float4> x)