blob: ddf2d5630d7de5202c4476e4fac109ebc3de13cf [file] [log] [blame]
// Copyright 2022 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "SIMD.hpp"
#include "Assert.hpp"
#include "Debug.hpp"
#include "Print.hpp"
#include <cmath>
namespace rr {
SIMD::Int::Int()
: XYZW(this)
{
}
SIMD::Int::Int(RValue<SIMD::Float> cast)
: XYZW(this)
{
Value *xyzw = Nucleus::createFPToSI(cast.value(), SIMD::Int::type());
storeValue(xyzw);
}
SIMD::Int::Int(int broadcast)
: XYZW(this)
{
std::vector<int64_t> constantVector = { broadcast };
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::Int::Int(int x, int y, int z, int w)
: XYZW(this)
{
std::vector<int64_t> constantVector = { x, y, z, w };
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::Int::Int(std::vector<int> v)
: XYZW(this)
{
std::vector<int64_t> constantVector;
for(int i : v) { constantVector.push_back(i); }
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::Int::Int(RValue<SIMD::Int> rhs)
: XYZW(this)
{
store(rhs);
}
SIMD::Int::Int(const SIMD::Int &rhs)
: XYZW(this)
{
store(rhs.load());
}
SIMD::Int::Int(const Reference<SIMD::Int> &rhs)
: XYZW(this)
{
store(rhs.load());
}
SIMD::Int::Int(RValue<SIMD::UInt> rhs)
: XYZW(this)
{
storeValue(rhs.value());
}
SIMD::Int::Int(const SIMD::UInt &rhs)
: XYZW(this)
{
storeValue(rhs.loadValue());
}
SIMD::Int::Int(const Reference<SIMD::UInt> &rhs)
: XYZW(this)
{
storeValue(rhs.loadValue());
}
SIMD::Int::Int(const scalar::Int &rhs)
: XYZW(this)
{
*this = RValue<scalar::Int>(rhs.loadValue());
}
SIMD::Int::Int(const Reference<scalar::Int> &rhs)
: XYZW(this)
{
*this = RValue<scalar::Int>(rhs.loadValue());
}
RValue<SIMD::Int> SIMD::Int::operator=(int x)
{
return *this = SIMD::Int(x);
}
RValue<SIMD::Int> SIMD::Int::operator=(RValue<SIMD::Int> rhs)
{
return store(rhs);
}
RValue<SIMD::Int> SIMD::Int::operator=(const SIMD::Int &rhs)
{
return store(rhs.load());
}
RValue<SIMD::Int> SIMD::Int::operator=(const Reference<SIMD::Int> &rhs)
{
return store(rhs.load());
}
RValue<SIMD::Int> operator+(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createAdd(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator-(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createSub(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator*(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createMul(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator/(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createSDiv(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator%(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createSRem(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator&(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createAnd(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator|(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createOr(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator^(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createXor(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator<<(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createShl(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator>>(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
{
return RValue<SIMD::Int>(Nucleus::createAShr(lhs.value(), rhs.value()));
}
RValue<SIMD::Int> operator+=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
{
return lhs = lhs + rhs;
}
RValue<SIMD::Int> operator-=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
{
return lhs = lhs - rhs;
}
RValue<SIMD::Int> operator*=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
{
return lhs = lhs * rhs;
}
// RValue<SIMD::Int> operator/=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
// {
// return lhs = lhs / rhs;
// }
// RValue<SIMD::Int> operator%=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
// {
// return lhs = lhs % rhs;
// }
RValue<SIMD::Int> operator&=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
{
return lhs = lhs & rhs;
}
RValue<SIMD::Int> operator|=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
{
return lhs = lhs | rhs;
}
RValue<SIMD::Int> operator^=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
{
return lhs = lhs ^ rhs;
}
RValue<SIMD::Int> operator<<=(SIMD::Int &lhs, unsigned char rhs)
{
return lhs = lhs << rhs;
}
RValue<SIMD::Int> operator>>=(SIMD::Int &lhs, unsigned char rhs)
{
return lhs = lhs >> rhs;
}
RValue<SIMD::Int> operator+(RValue<SIMD::Int> val)
{
return val;
}
RValue<SIMD::Int> operator-(RValue<SIMD::Int> val)
{
return RValue<SIMD::Int>(Nucleus::createNeg(val.value()));
}
RValue<SIMD::Int> operator~(RValue<SIMD::Int> val)
{
return RValue<SIMD::Int>(Nucleus::createNot(val.value()));
}
RValue<scalar::Int> Extract(RValue<SIMD::Int> x, int i)
{
return RValue<scalar::Int>(Nucleus::createExtractElement(x.value(), scalar::Int::type(), i));
}
RValue<SIMD::Int> Insert(RValue<SIMD::Int> x, RValue<scalar::Int> element, int i)
{
return RValue<SIMD::Int>(Nucleus::createInsertElement(x.value(), element.value(), i));
}
SIMD::UInt::UInt()
: XYZW(this)
{
}
SIMD::UInt::UInt(int broadcast)
: XYZW(this)
{
std::vector<int64_t> constantVector = { broadcast };
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::UInt::UInt(int x, int y, int z, int w)
: XYZW(this)
{
std::vector<int64_t> constantVector = { x, y, z, w };
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::UInt::UInt(std::vector<int> v)
: XYZW(this)
{
std::vector<int64_t> constantVector;
for(int i : v) { constantVector.push_back(i); }
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::UInt::UInt(RValue<SIMD::UInt> rhs)
: XYZW(this)
{
store(rhs);
}
SIMD::UInt::UInt(const SIMD::UInt &rhs)
: XYZW(this)
{
store(rhs.load());
}
SIMD::UInt::UInt(const Reference<SIMD::UInt> &rhs)
: XYZW(this)
{
store(rhs.load());
}
SIMD::UInt::UInt(RValue<SIMD::Int> rhs)
: XYZW(this)
{
storeValue(rhs.value());
}
SIMD::UInt::UInt(const SIMD::Int &rhs)
: XYZW(this)
{
storeValue(rhs.loadValue());
}
SIMD::UInt::UInt(const Reference<SIMD::Int> &rhs)
: XYZW(this)
{
storeValue(rhs.loadValue());
}
SIMD::UInt::UInt(const scalar::UInt &rhs)
: XYZW(this)
{
*this = RValue<scalar::UInt>(rhs.loadValue());
}
SIMD::UInt::UInt(const Reference<scalar::UInt> &rhs)
: XYZW(this)
{
*this = RValue<scalar::UInt>(rhs.loadValue());
}
RValue<SIMD::UInt> SIMD::UInt::operator=(RValue<SIMD::UInt> rhs)
{
return store(rhs);
}
RValue<SIMD::UInt> SIMD::UInt::operator=(const SIMD::UInt &rhs)
{
return store(rhs.load());
}
RValue<SIMD::UInt> SIMD::UInt::operator=(const Reference<SIMD::UInt> &rhs)
{
return store(rhs.load());
}
RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createAdd(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createSub(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator*(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createMul(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator/(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createUDiv(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator%(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createURem(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator&(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createAnd(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator|(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createOr(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator^(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createXor(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator<<(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createShl(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator>>(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
{
return RValue<SIMD::UInt>(Nucleus::createLShr(lhs.value(), rhs.value()));
}
RValue<SIMD::UInt> operator+=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
{
return lhs = lhs + rhs;
}
RValue<SIMD::UInt> operator-=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
{
return lhs = lhs - rhs;
}
RValue<SIMD::UInt> operator*=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
{
return lhs = lhs * rhs;
}
// RValue<SIMD::UInt> operator/=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
// {
// return lhs = lhs / rhs;
// }
// RValue<SIMD::UInt> operator%=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
// {
// return lhs = lhs % rhs;
// }
RValue<SIMD::UInt> operator&=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
{
return lhs = lhs & rhs;
}
RValue<SIMD::UInt> operator|=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
{
return lhs = lhs | rhs;
}
RValue<SIMD::UInt> operator^=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
{
return lhs = lhs ^ rhs;
}
RValue<SIMD::UInt> operator<<=(SIMD::UInt &lhs, unsigned char rhs)
{
return lhs = lhs << rhs;
}
RValue<SIMD::UInt> operator>>=(SIMD::UInt &lhs, unsigned char rhs)
{
return lhs = lhs >> rhs;
}
RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> val)
{
return val;
}
RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> val)
{
return RValue<SIMD::UInt>(Nucleus::createNeg(val.value()));
}
RValue<SIMD::UInt> operator~(RValue<SIMD::UInt> val)
{
return RValue<SIMD::UInt>(Nucleus::createNot(val.value()));
}
RValue<scalar::UInt> Extract(RValue<SIMD::UInt> x, int i)
{
return RValue<scalar::UInt>(Nucleus::createExtractElement(x.value(), scalar::Int::type(), i));
}
RValue<SIMD::UInt> Insert(RValue<SIMD::UInt> x, RValue<scalar::UInt> element, int i)
{
return RValue<SIMD::UInt>(Nucleus::createInsertElement(x.value(), element.value(), i));
}
SIMD::Float::Float(RValue<SIMD::Int> cast)
: XYZW(this)
{
Value *xyzw = Nucleus::createSIToFP(cast.value(), SIMD::Float::type());
storeValue(xyzw);
}
SIMD::Float::Float(RValue<SIMD::UInt> cast)
: XYZW(this)
{
RValue<SIMD::Float> result = SIMD::Float(SIMD::Int(cast & SIMD::UInt(0x7FFFFFFF))) +
As<SIMD::Float>((As<SIMD::Int>(cast) >> 31) & As<SIMD::Int>(SIMD::Float(0x80000000u)));
storeValue(result.value());
}
SIMD::Float::Float()
: XYZW(this)
{
}
SIMD::Float::Float(float broadcast)
: XYZW(this)
{
// See rr::Float(float) constructor for the rationale behind this assert.
ASSERT(std::isfinite(broadcast));
std::vector<double> constantVector = { broadcast };
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::Float::Float(float x, float y, float z, float w)
: XYZW(this)
{
std::vector<double> constantVector = { x, y, z, w };
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::Float::Float(std::vector<float> v)
: XYZW(this)
{
std::vector<double> constantVector;
for(int f : v) { constantVector.push_back(f); }
storeValue(Nucleus::createConstantVector(constantVector, type()));
}
SIMD::Float SIMD::Float::infinity()
{
SIMD::Float result;
constexpr double inf = std::numeric_limits<double>::infinity();
std::vector<double> constantVector = { inf };
result.storeValue(Nucleus::createConstantVector(constantVector, type()));
return result;
}
SIMD::Float::Float(RValue<SIMD::Float> rhs)
: XYZW(this)
{
store(rhs);
}
SIMD::Float::Float(const SIMD::Float &rhs)
: XYZW(this)
{
store(rhs.load());
}
SIMD::Float::Float(const Reference<SIMD::Float> &rhs)
: XYZW(this)
{
store(rhs.load());
}
SIMD::Float::Float(const scalar::Float &rhs)
: XYZW(this)
{
*this = RValue<scalar::Float>(rhs.loadValue());
}
SIMD::Float::Float(const Reference<scalar::Float> &rhs)
: XYZW(this)
{
*this = RValue<scalar::Float>(rhs.loadValue());
}
SIMD::Float::Float(RValue<packed::Float4> rhs)
: XYZW(this)
{
ASSERT(SIMD::Width == 4);
*this = Insert128(*this, rhs, 0);
}
RValue<SIMD::Float> SIMD::Float::operator=(RValue<packed::Float4> rhs)
{
return *this = SIMD::Float(rhs);
}
RValue<SIMD::Float> SIMD::Float::operator=(float x)
{
return *this = SIMD::Float(x);
}
RValue<SIMD::Float> SIMD::Float::operator=(RValue<SIMD::Float> rhs)
{
return store(rhs);
}
RValue<SIMD::Float> SIMD::Float::operator=(const SIMD::Float &rhs)
{
return store(rhs.load());
}
RValue<SIMD::Float> SIMD::Float::operator=(const Reference<SIMD::Float> &rhs)
{
return store(rhs.load());
}
RValue<SIMD::Float> SIMD::Float::operator=(RValue<scalar::Float> rhs)
{
return *this = SIMD::Float(rhs);
}
RValue<SIMD::Float> SIMD::Float::operator=(const scalar::Float &rhs)
{
return *this = SIMD::Float(rhs);
}
RValue<SIMD::Float> SIMD::Float::operator=(const Reference<scalar::Float> &rhs)
{
return *this = SIMD::Float(rhs);
}
RValue<SIMD::Float> operator+(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
{
return RValue<SIMD::Float>(Nucleus::createFAdd(lhs.value(), rhs.value()));
}
RValue<SIMD::Float> operator-(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
{
return RValue<SIMD::Float>(Nucleus::createFSub(lhs.value(), rhs.value()));
}
RValue<SIMD::Float> operator*(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
{
return RValue<SIMD::Float>(Nucleus::createFMul(lhs.value(), rhs.value()));
}
RValue<SIMD::Float> operator/(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
{
return RValue<SIMD::Float>(Nucleus::createFDiv(lhs.value(), rhs.value()));
}
RValue<SIMD::Float> operator+=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
{
return lhs = lhs + rhs;
}
RValue<SIMD::Float> operator-=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
{
return lhs = lhs - rhs;
}
RValue<SIMD::Float> operator*=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
{
return lhs = lhs * rhs;
}
RValue<SIMD::Float> operator/=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
{
return lhs = lhs / rhs;
}
RValue<SIMD::Float> operator%=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
{
return lhs = lhs % rhs;
}
RValue<SIMD::Float> operator+(RValue<SIMD::Float> val)
{
return val;
}
RValue<SIMD::Float> operator-(RValue<SIMD::Float> val)
{
return RValue<SIMD::Float>(Nucleus::createFNeg(val.value()));
}
RValue<SIMD::Float> Rcp(RValue<SIMD::Float> x, bool relaxedPrecision, bool exactAtPow2)
{
ASSERT(SIMD::Width == 4);
return SIMD::Float(Rcp(Extract128(x, 0), relaxedPrecision, exactAtPow2));
}
RValue<SIMD::Float> RcpSqrt(RValue<SIMD::Float> x, bool relaxedPrecision)
{
ASSERT(SIMD::Width == 4);
return SIMD::Float(RcpSqrt(Extract128(x, 0), relaxedPrecision));
}
RValue<SIMD::Float> Insert(RValue<SIMD::Float> x, RValue<scalar::Float> element, int i)
{
return RValue<SIMD::Float>(Nucleus::createInsertElement(x.value(), element.value(), i));
}
RValue<scalar::Float> Extract(RValue<SIMD::Float> x, int i)
{
return RValue<scalar::Float>(Nucleus::createExtractElement(x.value(), scalar::Float::type(), i));
}
RValue<SIMD::Int> IsInf(RValue<SIMD::Float> x)
{
return CmpEQ(As<SIMD::Int>(x) & SIMD::Int(0x7FFFFFFF), SIMD::Int(0x7F800000));
}
RValue<SIMD::Int> IsNan(RValue<SIMD::Float> x)
{
return ~CmpEQ(x, x);
}
RValue<SIMD::Float> Sin(RValue<SIMD::Float> x)
{
return ScalarizeCall(sinf, x);
}
RValue<SIMD::Float> Cos(RValue<SIMD::Float> x)
{
return ScalarizeCall(cosf, x);
}
RValue<SIMD::Float> Tan(RValue<SIMD::Float> x)
{
return ScalarizeCall(tanf, x);
}
RValue<SIMD::Float> Asin(RValue<SIMD::Float> x)
{
return ScalarizeCall(asinf, x);
}
RValue<SIMD::Float> Acos(RValue<SIMD::Float> x)
{
return ScalarizeCall(acosf, x);
}
RValue<SIMD::Float> Atan(RValue<SIMD::Float> x)
{
return ScalarizeCall(atanf, x);
}
RValue<SIMD::Float> Sinh(RValue<SIMD::Float> x)
{
return ScalarizeCall(sinhf, x);
}
RValue<SIMD::Float> Cosh(RValue<SIMD::Float> x)
{
return ScalarizeCall(coshf, x);
}
RValue<SIMD::Float> Tanh(RValue<SIMD::Float> x)
{
return ScalarizeCall(tanhf, x);
}
RValue<SIMD::Float> Asinh(RValue<SIMD::Float> x)
{
return ScalarizeCall(asinhf, x);
}
RValue<SIMD::Float> Acosh(RValue<SIMD::Float> x)
{
return ScalarizeCall(acoshf, x);
}
RValue<SIMD::Float> Atanh(RValue<SIMD::Float> x)
{
return ScalarizeCall(atanhf, x);
}
RValue<SIMD::Float> Atan2(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
{
return ScalarizeCall(atan2f, x, y);
}
RValue<SIMD::Float> Pow(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
{
return ScalarizeCall(powf, x, y);
}
RValue<SIMD::Float> Exp(RValue<SIMD::Float> x)
{
return ScalarizeCall(expf, x);
}
RValue<SIMD::Float> Log(RValue<SIMD::Float> x)
{
return ScalarizeCall(logf, x);
}
RValue<SIMD::Float> Exp2(RValue<SIMD::Float> x)
{
return ScalarizeCall(exp2f, x);
}
RValue<SIMD::Float> Log2(RValue<SIMD::Float> x)
{
return ScalarizeCall(log2f, x);
}
RValue<Int> SignMask(RValue<SIMD::Int> x)
{
ASSERT(SIMD::Width == 4);
return SignMask(Extract128(x, 0));
}
RValue<SIMD::UInt> Ctlz(RValue<SIMD::UInt> x, bool isZeroUndef)
{
ASSERT(SIMD::Width == 4);
SIMD::UInt result;
return Insert128(result, Ctlz(Extract128(x, 0), isZeroUndef), 0);
}
RValue<SIMD::UInt> Cttz(RValue<SIMD::UInt> x, bool isZeroUndef)
{
ASSERT(SIMD::Width == 4);
SIMD::UInt result;
return Insert128(result, Cttz(Extract128(x, 0), isZeroUndef), 0);
}
RValue<SIMD::Int> MulHigh(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
{
ASSERT(SIMD::Width == 4);
SIMD::Int result;
return Insert128(result, MulHigh(Extract128(x, 0), Extract128(y, 0)), 0);
}
RValue<SIMD::UInt> MulHigh(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
{
ASSERT(SIMD::Width == 4);
SIMD::UInt result;
return Insert128(result, MulHigh(Extract128(x, 0), Extract128(y, 0)), 0);
}
RValue<Bool> AnyTrue(const RValue<SIMD::Int> &bools)
{
ASSERT(SIMD::Width == 4);
return AnyTrue(Extract128(bools, 0));
}
RValue<Bool> AnyFalse(const RValue<SIMD::Int> &bools)
{
ASSERT(SIMD::Width == 4);
return AnyFalse(Extract128(bools, 0));
}
RValue<Bool> Divergent(const RValue<SIMD::Int> &ints)
{
ASSERT(SIMD::Width == 4);
return Divergent(Extract128(ints, 0));
}
RValue<SIMD::Int> Swizzle(RValue<SIMD::Int> x, uint16_t select)
{
ASSERT(SIMD::Width == 4);
SIMD::Int result;
return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
}
RValue<SIMD::UInt> Swizzle(RValue<SIMD::UInt> x, uint16_t select)
{
ASSERT(SIMD::Width == 4);
SIMD::UInt result;
return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
}
RValue<SIMD::Float> Swizzle(RValue<SIMD::Float> x, uint16_t select)
{
ASSERT(SIMD::Width == 4);
SIMD::Float result;
return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
}
RValue<SIMD::Int> Shuffle(RValue<SIMD::Int> x, RValue<SIMD::Int> y, uint16_t select)
{
ASSERT(SIMD::Width == 4);
SIMD::Int result;
return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
}
RValue<SIMD::UInt> Shuffle(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y, uint16_t select)
{
ASSERT(SIMD::Width == 4);
SIMD::UInt result;
return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
}
RValue<SIMD::Float> Shuffle(RValue<SIMD::Float> x, RValue<SIMD::Float> y, uint16_t select)
{
ASSERT(SIMD::Width == 4);
SIMD::Float result;
return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
}
SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, rr::Int limit)
: base(base)
, dynamicLimit(limit)
, staticLimit(0)
, dynamicOffsets(0)
, staticOffsets(SIMD::Width)
, hasDynamicLimit(true)
, hasDynamicOffsets(false)
, isBasePlusOffset(true)
{}
SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, unsigned int limit)
: base(base)
, dynamicLimit(0)
, staticLimit(limit)
, dynamicOffsets(0)
, staticOffsets(SIMD::Width)
, hasDynamicLimit(false)
, hasDynamicOffsets(false)
, isBasePlusOffset(true)
{}
SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
: base(base)
, dynamicLimit(limit)
, staticLimit(0)
, dynamicOffsets(offset)
, staticOffsets(SIMD::Width)
, hasDynamicLimit(true)
, hasDynamicOffsets(true)
, isBasePlusOffset(true)
{}
SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, unsigned int limit, SIMD::Int offset)
: base(base)
, dynamicLimit(0)
, staticLimit(limit)
, dynamicOffsets(offset)
, staticOffsets(SIMD::Width)
, hasDynamicLimit(false)
, hasDynamicOffsets(true)
, isBasePlusOffset(true)
{}
SIMD::Pointer::Pointer(std::vector<scalar::Pointer<Byte>> pointers)
: pointers(pointers)
, isBasePlusOffset(false)
{}
SIMD::Pointer::Pointer(SIMD::UInt cast)
: pointers(SIMD::Width)
, isBasePlusOffset(false)
{
assert(sizeof(void *) == 4);
for(int i = 0; i < SIMD::Width; i++)
{
pointers[i] = As<rr::Pointer<Byte>>(Extract(cast, i));
}
}
SIMD::Pointer::Pointer(SIMD::UInt castLow, SIMD::UInt castHigh)
: pointers(SIMD::Width)
, isBasePlusOffset(false)
{
assert(sizeof(void *) == 8);
for(int i = 0; i < SIMD::Width; i++)
{
UInt2 address;
address = Insert(address, Extract(castLow, i), 0);
address = Insert(address, Extract(castHigh, i), 1);
pointers[i] = As<rr::Pointer<Byte>>(address);
}
}
SIMD::Pointer &SIMD::Pointer::operator+=(SIMD::Int i)
{
if(isBasePlusOffset)
{
dynamicOffsets += i;
hasDynamicOffsets = true;
}
else
{
for(int el = 0; el < SIMD::Width; el++) { pointers[el] += Extract(i, el); }
}
return *this;
}
SIMD::Pointer SIMD::Pointer::operator+(SIMD::Int i)
{
SIMD::Pointer p = *this;
p += i;
return p;
}
SIMD::Pointer &SIMD::Pointer::operator+=(int i)
{
if(isBasePlusOffset)
{
for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; }
}
else
{
for(int el = 0; el < SIMD::Width; el++) { pointers[el] += i; }
}
return *this;
}
SIMD::Pointer SIMD::Pointer::operator+(int i)
{
SIMD::Pointer p = *this;
p += i;
return p;
}
SIMD::Int SIMD::Pointer::offsets() const
{
ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
return dynamicOffsets + SIMD::Int(staticOffsets);
}
SIMD::Int SIMD::Pointer::isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
{
ASSERT(accessSize > 0);
if(isStaticallyInBounds(accessSize, robustness))
{
return SIMD::Int(0xFFFFFFFF);
}
if(!hasDynamicOffsets && !hasDynamicLimit)
{
ASSERT(SIMD::Width == 4);
// Common fast paths.
return SIMD::Int(
(staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
(staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
(staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
(staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0);
}
return CmpGE(offsets(), 0) & CmpLT(offsets() + SIMD::Int(accessSize - 1), limit());
}
bool SIMD::Pointer::isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
{
if(hasDynamicOffsets)
{
return false;
}
if(hasDynamicLimit)
{
if(hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize))
{
switch(robustness)
{
case OutOfBoundsBehavior::UndefinedBehavior:
// With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
// but since it can't know in advance which branches are taken this must be true even for inactives lanes.
return true;
case OutOfBoundsBehavior::Nullify:
case OutOfBoundsBehavior::RobustBufferAccess:
case OutOfBoundsBehavior::UndefinedValue:
return false;
}
}
}
for(int i = 0; i < SIMD::Width; i++)
{
if(staticOffsets[i] + accessSize - 1 >= staticLimit)
{
return false;
}
}
return true;
}
SIMD::Int SIMD::Pointer::limit() const
{
return dynamicLimit + staticLimit;
}
// Returns true if all offsets are compile-time static and sequential
// (N+0*step, N+1*step, N+2*step, N+3*step)
bool SIMD::Pointer::hasStaticSequentialOffsets(unsigned int step) const
{
ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
if(hasDynamicOffsets)
{
return false;
}
for(int i = 1; i < SIMD::Width; i++)
{
if(staticOffsets[i - 1] + int32_t(step) != staticOffsets[i])
{
return false;
}
}
return true;
}
// Returns true if all offsets are compile-time static and equal
// (N, N, N, N)
bool SIMD::Pointer::hasStaticEqualOffsets() const
{
ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
if(hasDynamicOffsets)
{
return false;
}
for(int i = 1; i < SIMD::Width; i++)
{
if(staticOffsets[0] != staticOffsets[i])
{
return false;
}
}
return true;
}
scalar::Pointer<Byte> SIMD::Pointer::getUniformPointer() const
{
#ifndef NDEBUG
if(isBasePlusOffset)
{
SIMD::Int uniform = offsets();
scalar::Int x = Extract(uniform, 0);
for(int i = 1; i < SIMD::Width; i++)
{
Assert(x == Extract(uniform, i));
}
}
else
{
for(int i = 1; i < SIMD::Width; i++)
{
Assert(pointers[0] == pointers[i]);
}
}
#endif
return getPointerForLane(0);
}
scalar::Pointer<Byte> SIMD::Pointer::getPointerForLane(int lane) const
{
if(isBasePlusOffset)
{
return base + Extract(offsets(), lane);
}
else
{
return pointers[lane];
}
}
void SIMD::Pointer::castTo(SIMD::UInt &bits) const
{
assert(sizeof(void *) == 4);
for(int i = 0; i < SIMD::Width; i++)
{
bits = Insert(bits, As<scalar::UInt>(pointers[i]), i);
}
}
void SIMD::Pointer::castTo(SIMD::UInt &lowerBits, SIMD::UInt &upperBits) const
{
assert(sizeof(void *) == 8);
for(int i = 0; i < SIMD::Width; i++)
{
UInt2 address = As<UInt2>(pointers[i]);
lowerBits = Insert(lowerBits, Extract(address, 0), i);
upperBits = Insert(upperBits, Extract(address, 1), i);
}
}
SIMD::Pointer SIMD::Pointer::IfThenElse(SIMD::Int condition, const SIMD::Pointer &lhs, const SIMD::Pointer &rhs)
{
std::vector<scalar::Pointer<Byte>> pointers(SIMD::Width);
for(int i = 0; i < SIMD::Width; i++)
{
If(Extract(condition, i) != 0)
{
pointers[i] = lhs.getPointerForLane(i);
}
Else
{
pointers[i] = rhs.getPointerForLane(i);
}
}
return { pointers };
}
#ifdef ENABLE_RR_PRINT
std::vector<rr::Value *> SIMD::Pointer::getPrintValues() const
{
if(isBasePlusOffset)
{
return PrintValue::vals(base, offsets());
}
else
{
std::vector<Value *> vals;
for(int i = 0; i < SIMD::Width; i++)
{
vals.push_back(RValue<scalar::Pointer<Byte>>(pointers[i]).value());
}
return vals;
}
}
#endif
} // namespace rr