SubzeroReactor: implement most missing intrinsics
* Implement intrinsics for the Subzero backend required for
ANGLE / SwiftShader for GLES 2 and 3.
* Note that most intrinsics are implemented as "emulated". I've added
rr::emulated namespace in EmulatedReactor.hpp/cpp that contains the set
of Reactor functions that are presently being emulated. These are
invoked from SubzeroReactor until we decide to implement proper
intrinsics for these in Subzero.
Bug: b/130459196
Change-Id: I01171cfa7cc45b078c3b98be6b61328eee4f35e5
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/38874
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Antonio Maiorano <amaiorano@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ae486e9..28ff2af 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1540,6 +1540,7 @@
set(SUBZERO_REACTOR_LIST
${SOURCE_DIR}/Reactor/Debug.cpp
${SOURCE_DIR}/Reactor/Debug.hpp
+ ${SOURCE_DIR}/Reactor/EmulatedReactor.cpp
${SOURCE_DIR}/Reactor/ExecutableMemory.cpp
${SOURCE_DIR}/Reactor/ExecutableMemory.hpp
${SOURCE_DIR}/Reactor/Nucleus.hpp
@@ -1675,6 +1676,7 @@
${SOURCE_DIR}/Reactor/CPUID.hpp
${SOURCE_DIR}/Reactor/Debug.cpp
${SOURCE_DIR}/Reactor/Debug.hpp
+ ${SOURCE_DIR}/Reactor/EmulatedReactor.cpp
${SOURCE_DIR}/Reactor/ExecutableMemory.cpp
${SOURCE_DIR}/Reactor/ExecutableMemory.hpp
${SOURCE_DIR}/Reactor/LLVMReactor.cpp
diff --git a/src/Reactor/BUILD.gn b/src/Reactor/BUILD.gn
index b472d81..1d958e7 100644
--- a/src/Reactor/BUILD.gn
+++ b/src/Reactor/BUILD.gn
@@ -47,6 +47,7 @@
swiftshader_source_set("swiftshader_reactor_base") {
sources = [
"Debug.cpp",
+ "EmulatedReactor.cpp",
"ExecutableMemory.cpp",
"Reactor.cpp",
]
diff --git a/src/Reactor/EmulatedReactor.cpp b/src/Reactor/EmulatedReactor.cpp
new file mode 100644
index 0000000..8a06d6f
--- /dev/null
+++ b/src/Reactor/EmulatedReactor.cpp
@@ -0,0 +1,213 @@
+#include "EmulatedReactor.hpp"
+
+#include <cmath>
+#include <functional>
+#include <utility>
+
+namespace rr
+{
+ namespace
+ {
+ template <typename T>
+ struct UnderlyingType
+ {
+ using Type = typename decltype(rr::Extract(std::declval<RValue<T>>(), 0))::rvalue_underlying_type;
+ };
+
+ template <typename T>
+ using UnderlyingTypeT = typename UnderlyingType<T>::Type;
+
+ // Call single arg function on a vector type
+ template <typename Func, typename T>
+ RValue<T> call4(Func func, const RValue<T>& x)
+ {
+ T result;
+ result = Insert(result, Call(func, Extract(x, 0)), 0);
+ result = Insert(result, Call(func, Extract(x, 1)), 1);
+ result = Insert(result, Call(func, Extract(x, 2)), 2);
+ result = Insert(result, Call(func, Extract(x, 3)), 3);
+ return result;
+ }
+
+ // Call two arg function on a vector type
+ template <typename Func, typename T>
+ RValue<T> call4(Func func, const RValue<T>& x, const RValue<T>& y)
+ {
+ T result;
+ result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0)), 0);
+ result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1)), 1);
+ result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2)), 2);
+ result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3)), 3);
+ return result;
+ }
+
+ template <typename T, typename EL = UnderlyingTypeT<T>>
+ void gather(T& out, RValue<Pointer<EL>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes)
+ {
+ constexpr bool atomic = false;
+ constexpr std::memory_order order = std::memory_order_relaxed;
+
+ Pointer<Byte> baseBytePtr = base;
+
+ out = T(0);
+ for (int i = 0; i < 4; i++)
+ {
+ If(Extract(mask, i) != 0)
+ {
+ auto offset = Extract(offsets, i);
+ auto el = Load(Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
+ out = Insert(out, el, i);
+ }
+ Else If(zeroMaskedLanes)
+ {
+ out = Insert(out, EL(0), i);
+ }
+ }
+ }
+
+ template <typename T, typename EL = UnderlyingTypeT<T>>
+ void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
+ {
+ constexpr bool atomic = false;
+ constexpr std::memory_order order = std::memory_order_relaxed;
+
+ Pointer<Byte> baseBytePtr = base;
+
+ for (int i = 0; i < 4; i++)
+ {
+ If(Extract(mask, i) != 0)
+ {
+ auto offset = Extract(offsets, i);
+ Store(Extract(val, i), Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
+ }
+ }
+ }
+ }
+
+ namespace emulated
+ {
+ RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
+ {
+ Float4 result{};
+ gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
+ return result;
+ }
+
+ RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
+ {
+ Int4 result{};
+ gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
+ return result;
+ }
+
+ void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
+ {
+ scatter(base, val, offsets, mask, alignment);
+ }
+
+ void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
+ {
+ scatter<Int4>(base, val, offsets, mask, alignment);
+ }
+
+ RValue<Float> Exp2(RValue<Float> x)
+ {
+ return Call(exp2f, x);
+ }
+
+ RValue<Float> Log2(RValue<Float> x)
+ {
+ return Call(log2f, x);
+ }
+
+ RValue<Float4> Sin(RValue<Float4> x)
+ {
+ return call4(sinf, x);
+ }
+
+ RValue<Float4> Cos(RValue<Float4> x)
+ {
+ return call4(cosf, x);
+ }
+
+ RValue<Float4> Tan(RValue<Float4> x)
+ {
+ return call4(tanf, x);
+ }
+
+ RValue<Float4> Asin(RValue<Float4> x)
+ {
+ return call4(asinf, x);
+ }
+
+ RValue<Float4> Acos(RValue<Float4> x)
+ {
+ return call4(acosf, x);
+ }
+
+ RValue<Float4> Atan(RValue<Float4> x)
+ {
+ return call4(atanf, x);
+ }
+
+ RValue<Float4> Sinh(RValue<Float4> x)
+ {
+ return call4(sinhf, x);
+ }
+
+ RValue<Float4> Cosh(RValue<Float4> x)
+ {
+ return call4(coshf, x);
+ }
+
+ RValue<Float4> Tanh(RValue<Float4> x)
+ {
+ return call4(tanhf, x);
+ }
+
+ RValue<Float4> Asinh(RValue<Float4> x)
+ {
+ return call4(asinhf, x);
+ }
+
+ RValue<Float4> Acosh(RValue<Float4> x)
+ {
+ return call4(acoshf, x);
+ }
+
+ RValue<Float4> Atanh(RValue<Float4> x)
+ {
+ return call4(atanhf, x);
+ }
+
+ RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
+ {
+ return call4(atan2f, x, y);
+ }
+
+ RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
+ {
+ return call4(powf, x, y);
+ }
+
+ RValue<Float4> Exp(RValue<Float4> x)
+ {
+ return call4(expf, x);
+ }
+
+ RValue<Float4> Log(RValue<Float4> x)
+ {
+ return call4(logf, x);
+ }
+
+ RValue<Float4> Exp2(RValue<Float4> x)
+ {
+ return call4(exp2f, x);
+ }
+
+ RValue<Float4> Log2(RValue<Float4> x)
+ {
+ return call4(log2f, x);
+ }
+ }
+}
diff --git a/src/Reactor/EmulatedReactor.hpp b/src/Reactor/EmulatedReactor.hpp
new file mode 100644
index 0000000..dbdc198
--- /dev/null
+++ b/src/Reactor/EmulatedReactor.hpp
@@ -0,0 +1,53 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Reactor.hpp"
+
+// Implementation of Reactor functions that are "emulated" - that is,
+// implemented either in terms of Reactor code, or make use of
+// rr::Call to C functions. These are typically slower than implementing
+// in terms of direct calls to the JIT backend; however, provide a good
+// starting point for implementing a new backend, or for when adding
+// functionality to an existing backend is non-trivial.
+
+namespace rr
+{
+ namespace emulated
+ {
+ RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
+ RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
+ void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment);
+ void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment);
+ RValue<Float> Exp2(RValue<Float> x);
+ RValue<Float> Log2(RValue<Float> x);
+ RValue<Float4> Sin(RValue<Float4> x);
+ RValue<Float4> Cos(RValue<Float4> x);
+ RValue<Float4> Tan(RValue<Float4> x);
+ RValue<Float4> Asin(RValue<Float4> x);
+ RValue<Float4> Acos(RValue<Float4> x);
+ RValue<Float4> Atan(RValue<Float4> x);
+ RValue<Float4> Sinh(RValue<Float4> x);
+ RValue<Float4> Cosh(RValue<Float4> x);
+ RValue<Float4> Tanh(RValue<Float4> x);
+ RValue<Float4> Asinh(RValue<Float4> x);
+ RValue<Float4> Acosh(RValue<Float4> x);
+ RValue<Float4> Atanh(RValue<Float4> x);
+ RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y);
+ RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y);
+ RValue<Float4> Exp(RValue<Float4> x);
+ RValue<Float4> Log(RValue<Float4> x);
+ RValue<Float4> Exp2(RValue<Float4> x);
+ RValue<Float4> Log2(RValue<Float4> x);
+ }
+}
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 003716e..0360138 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -873,6 +873,55 @@
llvm::Value *mulh = jit->builder->CreateAShr(mult, intTy->getBitWidth());
return jit->builder->CreateTrunc(mulh, ty);
}
+
+ llvm::Value *createGather(llvm::Value *base, llvm::Type *elTy, llvm::Value *offsets, llvm::Value *mask, unsigned int alignment, bool zeroMaskedLanes)
+ {
+ ASSERT(base->getType()->isPointerTy());
+ ASSERT(offsets->getType()->isVectorTy());
+ ASSERT(mask->getType()->isVectorTy());
+
+ auto numEls = mask->getType()->getVectorNumElements();
+ auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
+ auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
+ auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
+ auto i8PtrTy = i8Ty->getPointerTo();
+ auto elPtrTy = elTy->getPointerTo();
+ auto elVecTy = ::llvm::VectorType::get(elTy, numEls);
+ auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
+ auto i8Base = jit->builder->CreatePointerCast(base, i8PtrTy);
+ auto i8Ptrs = jit->builder->CreateGEP(i8Base, offsets);
+ auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
+ auto i8Mask = jit->builder->CreateIntCast(mask, ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
+ auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
+ auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
+ auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
+ return jit->builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough });
+ }
+
+ void createScatter(llvm::Value *base, llvm::Value *val, llvm::Value *offsets, llvm::Value *mask, unsigned int alignment)
+ {
+ ASSERT(base->getType()->isPointerTy());
+ ASSERT(val->getType()->isVectorTy());
+ ASSERT(offsets->getType()->isVectorTy());
+ ASSERT(mask->getType()->isVectorTy());
+
+ auto numEls = mask->getType()->getVectorNumElements();
+ auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
+ auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
+ auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
+ auto i8PtrTy = i8Ty->getPointerTo();
+ auto elVecTy = val->getType();
+ auto elTy = elVecTy->getVectorElementType();
+ auto elPtrTy = elTy->getPointerTo();
+ auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
+ auto i8Base = jit->builder->CreatePointerCast(base, i8PtrTy);
+ auto i8Ptrs = jit->builder->CreateGEP(i8Base, offsets);
+ auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
+ auto i8Mask = jit->builder->CreateIntCast(mask, ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
+ auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
+ auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
+ jit->builder->CreateCall(func, { val, elPtrs, align, i8Mask });
+ }
}
namespace rr
@@ -1751,53 +1800,24 @@
jit->builder->CreateCall(func, { V(val), V(ptr), align, i8Mask });
}
- Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
+ RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{
- ASSERT(V(base)->getType()->isPointerTy());
- ASSERT(V(offsets)->getType()->isVectorTy());
- ASSERT(V(mask)->getType()->isVectorTy());
-
- auto numEls = V(mask)->getType()->getVectorNumElements();
- auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
- auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
- auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
- auto i8PtrTy = i8Ty->getPointerTo();
- auto elPtrTy = T(elTy)->getPointerTo();
- auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
- auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
- auto i8Base = jit->builder->CreatePointerCast(V(base), i8PtrTy);
- auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
- auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
- auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
- auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
- auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
- auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
- return V(jit->builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
+ return As<Float4>(V(createGather(V(base.value), T(Float::getType()), V(offsets.value), V(mask.value), alignment, zeroMaskedLanes)));
}
- void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment)
+ RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
{
- ASSERT(V(base)->getType()->isPointerTy());
- ASSERT(V(val)->getType()->isVectorTy());
- ASSERT(V(offsets)->getType()->isVectorTy());
- ASSERT(V(mask)->getType()->isVectorTy());
+ return As<Int4>(V(createGather(V(base.value), T(Float::getType()), V(offsets.value), V(mask.value), alignment, zeroMaskedLanes)));
+ }
- auto numEls = V(mask)->getType()->getVectorNumElements();
- auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
- auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
- auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
- auto i8PtrTy = i8Ty->getPointerTo();
- auto elVecTy = V(val)->getType();
- auto elTy = elVecTy->getVectorElementType();
- auto elPtrTy = elTy->getPointerTo();
- auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
- auto i8Base = jit->builder->CreatePointerCast(V(base), i8PtrTy);
- auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
- auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
- auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
- auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
- auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
- jit->builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
+ void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
+ {
+ return createScatter(V(base.value), V(val.value), V(offsets.value), V(mask.value), alignment);
+ }
+
+ void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
+ {
+ return createScatter(V(base.value), V(val.value), V(offsets.value), V(mask.value), alignment);
}
void Nucleus::createFence(std::memory_order memoryOrder)
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index 9f66115..50e27ad 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -199,10 +199,6 @@
static Value *createMaskedLoad(Value *base, Type *elementType, Value *mask, unsigned int alignment, bool zeroMaskedLanes);
static void createMaskedStore(Value *base, Value *value, Value *mask, unsigned int alignment);
- // Scatter / Gather instructions
- static Value *createGather(Value *base, Type *elementType, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes);
- static void createScatter(Value *base, Value *value, Value *offsets, Value *mask, unsigned int alignment);
-
// Barrier instructions
static void createFence(std::memory_order memoryOrder);
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index faa8738..31ab59e 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -4327,26 +4327,6 @@
Nucleus::createMaskedStore(base.value, val.value, mask.value, alignment);
}
- RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
- {
- return RValue<Float4>(Nucleus::createGather(base.value, Float::getType(), offsets.value, mask.value, alignment, zeroMaskedLanes));
- }
-
- RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
- {
- return RValue<Int4>(Nucleus::createGather(base.value, Int::getType(), offsets.value, mask.value, alignment, zeroMaskedLanes));
- }
-
- void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
- {
- Nucleus::createScatter(base.value, val.value, offsets.value, mask.value, alignment);
- }
-
- void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
- {
- Nucleus::createScatter(base.value, val.value, offsets.value, mask.value, alignment);
- }
-
void Fence(std::memory_order memoryOrder)
{
ASSERT_MSG(memoryOrder == std::memory_order_acquire ||
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index e4b6be0..81757a9 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2316,14 +2316,14 @@
// TODO: Currently unimplemented for Subzero.
// Count leading zeros.
- // Returns 32 when: isZeroUndef && x == 0.
- // Returns an undefined value when: !isZeroUndef && x == 0.
+ // Returns 32 when: !isZeroUndef && x == 0.
+ // Returns an undefined value when: isZeroUndef && x == 0.
RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef);
RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef);
// Count trailing zeros.
- // Returns 32 when: isZeroUndef && x == 0.
- // Returns an undefined value when: !isZeroUndef && x == 0.
+ // Returns 32 when: !isZeroUndef && x == 0.
+ // Returns an undefined value when: isZeroUndef && x == 0.
RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef);
RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 60daccb..47b4b16 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -14,6 +14,7 @@
#include "Reactor.hpp"
#include "Debug.hpp"
+#include "EmulatedReactor.hpp"
#include "Optimizer.hpp"
#include "ExecutableMemory.hpp"
@@ -3560,7 +3561,6 @@
Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
{
- // FIXME: This does not currently work on Windows.
Ice::Variable *ret = nullptr;
if (retTy != nullptr)
{
@@ -3583,37 +3583,203 @@
::basicBlock->appendInst(trap);
}
- // Below are functions currently unimplemented for the Subzero backend.
- // They are stubbed to satisfy the linker.
void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createMaskedLoad()"); return nullptr; }
void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createMaskedStore()"); }
- Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; }
- void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); }
- RValue<Float> Exp2(RValue<Float> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float(0); }
- RValue<Float> Log2(RValue<Float> x) { UNIMPLEMENTED("Subzero Log2()"); return Float(0); }
- RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); }
- RValue<Float4> Cos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cos()"); return Float4(0); }
- RValue<Float4> Tan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tan()"); return Float4(0); }
- RValue<Float4> Asin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asin()"); return Float4(0); }
- RValue<Float4> Acos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acos()"); return Float4(0); }
- RValue<Float4> Atan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atan()"); return Float4(0); }
- RValue<Float4> Sinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sinh()"); return Float4(0); }
- RValue<Float4> Cosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cosh()"); return Float4(0); }
- RValue<Float4> Tanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tanh()"); return Float4(0); }
- RValue<Float4> Asinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asinh()"); return Float4(0); }
- RValue<Float4> Acosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acosh()"); return Float4(0); }
- RValue<Float4> Atanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atanh()"); return Float4(0); }
- RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Atan2()"); return Float4(0); }
- RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Pow()"); return Float4(0); }
- RValue<Float4> Exp(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp()"); return Float4(0); }
- RValue<Float4> Log(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log()"); return Float4(0); }
- RValue<Float4> Exp2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float4(0); }
- RValue<Float4> Log2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log2()"); return Float4(0); }
- RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt(0); }
- RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt4(0); }
- RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt(0); }
- RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt4(0); }
+
+ RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
+ {
+ return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
+ }
+
+ RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
+ {
+ return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
+ }
+
+ void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
+ {
+ return emulated::Scatter(base, val, offsets, mask, alignment);
+ }
+
+ void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
+ {
+ return emulated::Scatter(base, val, offsets, mask, alignment);
+ }
+
+ RValue<Float> Exp2(RValue<Float> x)
+ {
+ return emulated::Exp2(x);
+ }
+
+ RValue<Float> Log2(RValue<Float> x)
+ {
+ return emulated::Log2(x);
+ }
+
+ RValue<Float4> Sin(RValue<Float4> x)
+ {
+ return emulated::Sin(x);
+ }
+
+ RValue<Float4> Cos(RValue<Float4> x)
+ {
+ return emulated::Cos(x);
+ }
+
+ RValue<Float4> Tan(RValue<Float4> x)
+ {
+ return emulated::Tan(x);
+ }
+
+ RValue<Float4> Asin(RValue<Float4> x)
+ {
+ return emulated::Asin(x);
+ }
+
+ RValue<Float4> Acos(RValue<Float4> x)
+ {
+ return emulated::Acos(x);
+ }
+
+ RValue<Float4> Atan(RValue<Float4> x)
+ {
+ return emulated::Atan(x);
+ }
+
+ RValue<Float4> Sinh(RValue<Float4> x)
+ {
+ return emulated::Sinh(x);
+ }
+
+ RValue<Float4> Cosh(RValue<Float4> x)
+ {
+ return emulated::Cosh(x);
+ }
+
+ RValue<Float4> Tanh(RValue<Float4> x)
+ {
+ return emulated::Tanh(x);
+ }
+
+ RValue<Float4> Asinh(RValue<Float4> x)
+ {
+ return emulated::Asinh(x);
+ }
+
+ RValue<Float4> Acosh(RValue<Float4> x)
+ {
+ return emulated::Acosh(x);
+ }
+
+ RValue<Float4> Atanh(RValue<Float4> x)
+ {
+ return emulated::Atanh(x);
+ }
+
+ RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
+ {
+ return emulated::Atan2(x, y);
+ }
+
+ RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
+ {
+ return emulated::Pow(x, y);
+ }
+
+ RValue<Float4> Exp(RValue<Float4> x)
+ {
+ return emulated::Exp(x);
+ }
+
+ RValue<Float4> Log(RValue<Float4> x)
+ {
+ return emulated::Log(x);
+ }
+
+ RValue<Float4> Exp2(RValue<Float4> x)
+ {
+ return emulated::Exp2(x);
+ }
+
+ RValue<Float4> Log2(RValue<Float4> x)
+ {
+ return emulated::Log2(x);
+ }
+
+ RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
+ {
+ if (emulateIntrinsics)
+ {
+ UNIMPLEMENTED("Subzero Ctlz()"); return UInt(0);
+ }
+ else
+ {
+ Ice::Variable* result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ ctlz->addArg(x.value);
+ ::basicBlock->appendInst(ctlz);
+
+ return RValue<UInt>(V(result));
+ }
+ }
+
+ RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
+ {
+ if (emulateIntrinsics)
+ {
+ UNIMPLEMENTED("Subzero Ctlz()"); return UInt4(0);
+ }
+ else
+ {
+ // TODO: implement vectorized version in Subzero
+ UInt4 result;
+ result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
+ result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
+ result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
+ result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
+ return result;
+ }
+ }
+
+ RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
+ {
+ if (emulateIntrinsics)
+ {
+ UNIMPLEMENTED("Subzero Cttz()"); return UInt(0);
+ }
+ else
+ {
+ Ice::Variable* result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ ctlz->addArg(x.value);
+ ::basicBlock->appendInst(ctlz);
+
+ return RValue<UInt>(V(result));
+ }
+ }
+
+ RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
+ {
+ if (emulateIntrinsics)
+ {
+ UNIMPLEMENTED("Subzero Cttz()"); return UInt4(0);
+ }
+ else
+ {
+ // TODO: implement vectorized version in Subzero
+ UInt4 result;
+ result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
+ result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
+ result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
+ result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
+ return result;
+ }
+ }
void EmitDebugLocation() {}
void EmitDebugVariable(Value* value) {}