src/Reactor/EmulatedIntrinsics.cpp - SwiftShader - Git at Google

 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "EmulatedIntrinsics.hpp"

 #include <algorithm>
 #include <cmath>
 #include <functional>
 #include <mutex>
 #include <utility>

 namespace rr {
 namespace {

 template<typename T>
 struct UnderlyingType
 {
 	using Type = typename decltype(rr::Extract(std::declval<RValue<T>>(), 0))::rvalue_underlying_type;
 };

 template<typename T>
 using UnderlyingTypeT = typename UnderlyingType<T>::Type;

 // Call single arg function on a vector type
 template<typename Func, typename T>
 RValue<T> call4(Func func, const RValue<T> &x)
 {
 	T result;
 	result = Insert(result, Call(func, Extract(x, 0)), 0);
 	result = Insert(result, Call(func, Extract(x, 1)), 1);
 	result = Insert(result, Call(func, Extract(x, 2)), 2);
 	result = Insert(result, Call(func, Extract(x, 3)), 3);
 	return result;
 }

 // Call two arg function on a vector type
 template<typename Func, typename T>
 RValue<T> call4(Func func, const RValue<T> &x, const RValue<T> &y)
 {
 	T result;
 	result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0)), 0);
 	result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1)), 1);
 	result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2)), 2);
 	result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3)), 3);
 	return result;
 }

 template<typename T, typename EL = UnderlyingTypeT<T>>
 void gather(T &out, RValue<Pointer<EL>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes)
 {
 	constexpr bool atomic = false;
 	constexpr std::memory_order order = std::memory_order_relaxed;

 	Pointer<Byte> baseBytePtr = base;

 	out = T(0);
 	for(int i = 0; i < 4; i++)
 	{
 		If(Extract(mask, i) != 0)
 		{
 			auto offset = Extract(offsets, i);
 			auto el = Load(Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
 			out = Insert(out, el, i);
 		}
 		Else If(zeroMaskedLanes)
 		{
 			out = Insert(out, EL(0), i);
 		}
 	}
 }

 template<typename T, typename EL = UnderlyingTypeT<T>>
 void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
 {
 	constexpr bool atomic = false;
 	constexpr std::memory_order order = std::memory_order_relaxed;

 	Pointer<Byte> baseBytePtr = base;

 	for(int i = 0; i < 4; i++)
 	{
 		If(Extract(mask, i) != 0)
 		{
 			auto offset = Extract(offsets, i);
 			Store(Extract(val, i), Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
 		}
 	}
 }

 // TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min
 // operations for a given T mutually exclusive, rather than only the ones on the value pointed to
 // by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android.
 // TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation.
 template<typename T>
 static T atomicMin(T *ptr, T value)
 {
 	static std::mutex m;

 	std::lock_guard<std::mutex> lock(m);
 	T origValue = *ptr;
 	*ptr = std::min(origValue, value);
 	return origValue;
 }
 template<typename T>
 static T atomicMax(T *ptr, T value)
 {
 	static std::mutex m;

 	std::lock_guard<std::mutex> lock(m);
 	T origValue = *ptr;
 	*ptr = std::max(origValue, value);
 	return origValue;
 }

 }  // anonymous namespace

 namespace emulated {

 RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
 {
 	Float4 result{};
 	gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
 	return result;
 }

 RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
 {
 	Int4 result{};
 	gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
 	return result;
 }

 void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
 {
 	scatter(base, val, offsets, mask, alignment);
 }

 void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
 {
 	scatter<Int4>(base, val, offsets, mask, alignment);
 }

 RValue<Float> Exp2(RValue<Float> x)
 {
 	return Call(exp2f, x);
 }

 RValue<Float> Log2(RValue<Float> x)
 {
 	return Call(log2f, x);
 }

 RValue<Float4> Sin(RValue<Float4> x)
 {
 	return call4(sinf, x);
 }

 RValue<Float4> Cos(RValue<Float4> x)
 {
 	return call4(cosf, x);
 }

 RValue<Float4> Tan(RValue<Float4> x)
 {
 	return call4(tanf, x);
 }

 RValue<Float4> Asin(RValue<Float4> x)
 {
 	return call4(asinf, x);
 }

 RValue<Float4> Acos(RValue<Float4> x)
 {
 	return call4(acosf, x);
 }

 RValue<Float4> Atan(RValue<Float4> x)
 {
 	return call4(atanf, x);
 }

 RValue<Float4> Sinh(RValue<Float4> x)
 {
 	// TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level
 	return Float4(0.5f) * (emulated::Exp(x) - emulated::Exp(-x));
 }

 RValue<Float4> Cosh(RValue<Float4> x)
 {
 	// TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level
 	return Float4(0.5f) * (emulated::Exp(x) + emulated::Exp(-x));
 }

 RValue<Float4> Tanh(RValue<Float4> x)
 {
 	return call4(tanhf, x);
 }

 RValue<Float4> Asinh(RValue<Float4> x)
 {
 	return call4(asinhf, x);
 }

 RValue<Float4> Acosh(RValue<Float4> x)
 {
 	return call4(acoshf, x);
 }

 RValue<Float4> Atanh(RValue<Float4> x)
 {
 	return call4(atanhf, x);
 }

 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
 {
 	return call4(atan2f, x, y);
 }

 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
 {
 	return call4(powf, x, y);
 }

 RValue<Float4> Exp(RValue<Float4> x)
 {
 	return call4(expf, x);
 }

 RValue<Float4> Log(RValue<Float4> x)
 {
 	return call4(logf, x);
 }

 RValue<Float4> Exp2(RValue<Float4> x)
 {
 	return call4(exp2f, x);
 }

 RValue<Float4> Log2(RValue<Float4> x)
 {
 	return call4(log2f, x);
 }

 RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
 {
 	return Call(atomicMin<int32_t>, x, y);
 }

 RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
 {
 	return Call(atomicMin<uint32_t>, x, y);
 }

 RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
 {
 	return Call(atomicMax<int32_t>, x, y);
 }

 RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
 {
 	return Call(atomicMax<uint32_t>, x, y);
 }

 RValue<Float4> FRem(RValue<Float4> lhs, RValue<Float4> rhs)
 {
 	return call4(fmodf, lhs, rhs);
 }

 }  // namespace emulated
 }  // namespace rr
	// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "EmulatedIntrinsics.hpp"

	#include <algorithm>
	#include <cmath>
	#include <functional>
	#include <mutex>
	#include <utility>

	namespace rr {
	namespace {

	template<typename T>
	struct UnderlyingType
	{
	using Type = typename decltype(rr::Extract(std::declval<RValue<T>>(), 0))::rvalue_underlying_type;
	};

	template<typename T>
	using UnderlyingTypeT = typename UnderlyingType<T>::Type;

	// Call single arg function on a vector type
	template<typename Func, typename T>
	RValue<T> call4(Func func, const RValue<T> &x)
	{
	T result;
	result = Insert(result, Call(func, Extract(x, 0)), 0);
	result = Insert(result, Call(func, Extract(x, 1)), 1);
	result = Insert(result, Call(func, Extract(x, 2)), 2);
	result = Insert(result, Call(func, Extract(x, 3)), 3);
	return result;
	}

	// Call two arg function on a vector type
	template<typename Func, typename T>
	RValue<T> call4(Func func, const RValue<T> &x, const RValue<T> &y)
	{
	T result;
	result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0)), 0);
	result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1)), 1);
	result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2)), 2);
	result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3)), 3);
	return result;
	}

	template<typename T, typename EL = UnderlyingTypeT<T>>
	void gather(T &out, RValue<Pointer<EL>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes)
	{
	constexpr bool atomic = false;
	constexpr std::memory_order order = std::memory_order_relaxed;

	Pointer<Byte> baseBytePtr = base;

	out = T(0);
	for(int i = 0; i < 4; i++)
	{
	If(Extract(mask, i) != 0)
	{
	auto offset = Extract(offsets, i);
	auto el = Load(Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
	out = Insert(out, el, i);
	}
	Else If(zeroMaskedLanes)
	{
	out = Insert(out, EL(0), i);
	}
	}
	}

	template<typename T, typename EL = UnderlyingTypeT<T>>
	void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
	{
	constexpr bool atomic = false;
	constexpr std::memory_order order = std::memory_order_relaxed;

	Pointer<Byte> baseBytePtr = base;

	for(int i = 0; i < 4; i++)
	{
	If(Extract(mask, i) != 0)
	{
	auto offset = Extract(offsets, i);
	Store(Extract(val, i), Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
	}
	}
	}

	// TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min
	// operations for a given T mutually exclusive, rather than only the ones on the value pointed to
	// by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android.
	// TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation.
	template<typename T>
	static T atomicMin(T *ptr, T value)
	{
	static std::mutex m;

	std::lock_guard<std::mutex> lock(m);
	T origValue = *ptr;
	*ptr = std::min(origValue, value);
	return origValue;
	}
	template<typename T>
	static T atomicMax(T *ptr, T value)
	{
	static std::mutex m;

	std::lock_guard<std::mutex> lock(m);
	T origValue = *ptr;
	*ptr = std::max(origValue, value);
	return origValue;
	}

	} // anonymous namespace

	namespace emulated {

	RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
	{
	Float4 result{};
	gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
	return result;
	}

	RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
	{
	Int4 result{};
	gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
	return result;
	}

	void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
	{
	scatter(base, val, offsets, mask, alignment);
	}

	void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
	{
	scatter<Int4>(base, val, offsets, mask, alignment);
	}

	RValue<Float> Exp2(RValue<Float> x)
	{
	return Call(exp2f, x);
	}

	RValue<Float> Log2(RValue<Float> x)
	{
	return Call(log2f, x);
	}

	RValue<Float4> Sin(RValue<Float4> x)
	{
	return call4(sinf, x);
	}

	RValue<Float4> Cos(RValue<Float4> x)
	{
	return call4(cosf, x);
	}

	RValue<Float4> Tan(RValue<Float4> x)
	{
	return call4(tanf, x);
	}

	RValue<Float4> Asin(RValue<Float4> x)
	{
	return call4(asinf, x);
	}

	RValue<Float4> Acos(RValue<Float4> x)
	{
	return call4(acosf, x);
	}

	RValue<Float4> Atan(RValue<Float4> x)
	{
	return call4(atanf, x);
	}

	RValue<Float4> Sinh(RValue<Float4> x)
	{
	// TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level
	return Float4(0.5f) * (emulated::Exp(x) - emulated::Exp(-x));
	}

	RValue<Float4> Cosh(RValue<Float4> x)
	{
	// TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level
	return Float4(0.5f) * (emulated::Exp(x) + emulated::Exp(-x));
	}

	RValue<Float4> Tanh(RValue<Float4> x)
	{
	return call4(tanhf, x);
	}

	RValue<Float4> Asinh(RValue<Float4> x)
	{
	return call4(asinhf, x);
	}

	RValue<Float4> Acosh(RValue<Float4> x)
	{
	return call4(acoshf, x);
	}

	RValue<Float4> Atanh(RValue<Float4> x)
	{
	return call4(atanhf, x);
	}

	RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
	{
	return call4(atan2f, x, y);
	}

	RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
	{
	return call4(powf, x, y);
	}

	RValue<Float4> Exp(RValue<Float4> x)
	{
	return call4(expf, x);
	}

	RValue<Float4> Log(RValue<Float4> x)
	{
	return call4(logf, x);
	}

	RValue<Float4> Exp2(RValue<Float4> x)
	{
	return call4(exp2f, x);
	}

	RValue<Float4> Log2(RValue<Float4> x)
	{
	return call4(log2f, x);
	}

	RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
	{
	return Call(atomicMin<int32_t>, x, y);
	}

	RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
	{
	return Call(atomicMin<uint32_t>, x, y);
	}

	RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
	{
	return Call(atomicMax<int32_t>, x, y);
	}

	RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
	{
	return Call(atomicMax<uint32_t>, x, y);
	}

	RValue<Float4> FRem(RValue<Float4> lhs, RValue<Float4> rhs)
	{
	return call4(fmodf, lhs, rhs);
	}

	} // namespace emulated
	} // namespace rr