| // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef sw_ShaderCore_hpp |
| #define sw_ShaderCore_hpp |
| |
| #include "Reactor/Print.hpp" |
| #include "Reactor/Reactor.hpp" |
| #include "System/Debug.hpp" |
| |
| #include <array> |
| #include <atomic> // std::memory_order |
| #include <utility> // std::pair |
| |
| namespace sw { |
| |
| using namespace rr; |
| |
| class Vector4s |
| { |
| public: |
| Vector4s(); |
| Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w); |
| Vector4s(const Vector4s &rhs); |
| |
| Short4 &operator[](int i); |
| Vector4s &operator=(const Vector4s &rhs); |
| |
| Short4 x; |
| Short4 y; |
| Short4 z; |
| Short4 w; |
| }; |
| |
| class Vector4f |
| { |
| public: |
| Vector4f(); |
| Vector4f(float x, float y, float z, float w); |
| Vector4f(const Vector4f &rhs); |
| |
| Float4 &operator[](int i); |
| Vector4f &operator=(const Vector4f &rhs); |
| |
| Float4 x; |
| Float4 y; |
| Float4 z; |
| Float4 w; |
| }; |
| |
| class Vector4i |
| { |
| public: |
| Vector4i(); |
| Vector4i(int x, int y, int z, int w); |
| Vector4i(const Vector4i &rhs); |
| |
| Int4 &operator[](int i); |
| Vector4i &operator=(const Vector4i &rhs); |
| |
| Int4 x; |
| Int4 y; |
| Int4 z; |
| Int4 w; |
| }; |
| |
| enum class OutOfBoundsBehavior |
| { |
| Nullify, // Loads become zero, stores are elided. |
| RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing). |
| UndefinedValue, // Only for load operations. Not secure. No program termination. |
| UndefinedBehavior, // Program may terminate. |
| }; |
| |
| // SIMD contains types that represent multiple scalars packed into a single |
| // vector data type. Types in the SIMD namespace provide a semantic hint |
| // that the data should be treated as a per-execution-lane scalar instead of |
| // a typical euclidean-style vector type. |
| namespace SIMD { |
| |
| // Width is the number of per-lane scalars packed into each SIMD vector. |
| static constexpr int Width = 4; |
| |
| using Float = rr::Float4; |
| using Int = rr::Int4; |
| using UInt = rr::UInt4; |
| |
| struct Pointer |
| { |
| Pointer(rr::Pointer<Byte> base, rr::Int limit); |
| Pointer(rr::Pointer<Byte> base, unsigned int limit); |
| Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset); |
| Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset); |
| |
| Pointer &operator+=(Int i); |
| Pointer &operator*=(Int i); |
| |
| Pointer operator+(SIMD::Int i); |
| Pointer operator*(SIMD::Int i); |
| |
| Pointer &operator+=(int i); |
| Pointer &operator*=(int i); |
| |
| Pointer operator+(int i); |
| Pointer operator*(int i); |
| |
| SIMD::Int offsets() const; |
| |
| SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const; |
| |
| bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const; |
| |
| rr::Int limit() const; |
| |
| // Returns true if all offsets are sequential |
| // (N+0*step, N+1*step, N+2*step, N+3*step) |
| rr::Bool hasSequentialOffsets(unsigned int step) const; |
| |
| // Returns true if all offsets are are compile-time static and |
| // sequential (N+0*step, N+1*step, N+2*step, N+3*step) |
| bool hasStaticSequentialOffsets(unsigned int step) const; |
| |
| // Returns true if all offsets are equal (N, N, N, N) |
| rr::Bool hasEqualOffsets() const; |
| |
| // Returns true if all offsets are compile-time static and are equal |
| // (N, N, N, N) |
| bool hasStaticEqualOffsets() const; |
| |
| template<typename T> |
| inline T Load(OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float)); |
| |
| template<typename T> |
| inline void Store(T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed); |
| |
| template<typename T> |
| inline void Store(RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed); |
| |
| // Base address for the pointer, common across all lanes. |
| rr::Pointer<rr::Byte> base; |
| |
| // Upper (non-inclusive) limit for offsets from base. |
| rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero. |
| unsigned int staticLimit; |
| |
| // Per lane offsets from base. |
| SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero. |
| std::array<int32_t, SIMD::Width> staticOffsets; |
| |
| bool hasDynamicLimit; // True if dynamicLimit is non-zero. |
| bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero. |
| }; |
| |
| template<typename T> |
| struct Element |
| {}; |
| template<> |
| struct Element<Float> |
| { |
| using type = rr::Float; |
| }; |
| template<> |
| struct Element<Int> |
| { |
| using type = rr::Int; |
| }; |
| template<> |
| struct Element<UInt> |
| { |
| using type = rr::UInt; |
| }; |
| |
| } // namespace SIMD |
| |
| Float4 exponential2(RValue<Float4> x, bool pp = false); |
| Float4 logarithm2(RValue<Float4> x, bool pp = false); |
| Float4 exponential(RValue<Float4> x, bool pp = false); |
| Float4 logarithm(RValue<Float4> x, bool pp = false); |
| Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false); |
| Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false); |
| Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false); |
| Float4 modulo(RValue<Float4> x, RValue<Float4> y); |
| Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range |
| Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range |
| Float4 sine(RValue<Float4> x, bool pp = false); |
| Float4 cosine(RValue<Float4> x, bool pp = false); |
| Float4 tangent(RValue<Float4> x, bool pp = false); |
| Float4 arccos(RValue<Float4> x, bool pp = false); |
| Float4 arcsin(RValue<Float4> x, bool pp = false); |
| Float4 arctan(RValue<Float4> x, bool pp = false); |
| Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false); |
| Float4 sineh(RValue<Float4> x, bool pp = false); |
| Float4 cosineh(RValue<Float4> x, bool pp = false); |
| Float4 tangenth(RValue<Float4> x, bool pp = false); |
| Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1 |
| Float4 arcsinh(RValue<Float4> x, bool pp = false); |
| Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range |
| |
| Float4 dot2(const Vector4f &v0, const Vector4f &v1); |
| Float4 dot3(const Vector4f &v0, const Vector4f &v1); |
| Float4 dot4(const Vector4f &v0, const Vector4f &v1); |
| |
| void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); |
| void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); |
| void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
| void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
| void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
| void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
| void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
| void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); |
| |
| sw::SIMD::UInt halfToFloatBits(sw::SIMD::UInt halfBits); |
| sw::SIMD::UInt floatToHalfBits(sw::SIMD::UInt floatBits, bool storeInUpperBits); |
| Float4 r11g11b10Unpack(UInt r11g11b10bits); |
| UInt r11g11b10Pack(const Float4 &value); |
| Vector4s a2b10g10r10Unpack(const Int4 &value); |
| Vector4s a2r10g10b10Unpack(const Int4 &value); |
| |
| rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints); |
| |
| rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints); |
| |
| template<typename T> |
| inline rr::RValue<T> AndAll(rr::RValue<T> const &mask); |
| |
| template<typename T> |
| inline rr::RValue<T> OrAll(rr::RValue<T> const &mask); |
| |
| rr::RValue<sw::SIMD::Float> Sign(rr::RValue<sw::SIMD::Float> const &val); |
| |
| // Returns the <whole, frac> of val. |
| // Both whole and frac will have the same sign as val. |
| std::pair<rr::RValue<sw::SIMD::Float>, rr::RValue<sw::SIMD::Float>> |
| Modf(rr::RValue<sw::SIMD::Float> const &val); |
| |
| // Returns the number of 1s in bits, per lane. |
| sw::SIMD::UInt CountBits(rr::RValue<sw::SIMD::UInt> const &bits); |
| |
| // Returns 1 << bits. |
| // If the resulting bit overflows a 32 bit integer, 0 is returned. |
| rr::RValue<sw::SIMD::UInt> NthBit32(rr::RValue<sw::SIMD::UInt> const &bits); |
| |
| // Returns bitCount number of of 1's starting from the LSB. |
| rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount); |
| |
| // Performs a fused-multiply add, returning a * b + c. |
| rr::RValue<sw::SIMD::Float> FMA( |
| rr::RValue<sw::SIMD::Float> const &a, |
| rr::RValue<sw::SIMD::Float> const &b, |
| rr::RValue<sw::SIMD::Float> const &c); |
| |
| // Returns the exponent of the floating point number f. |
| // Assumes IEEE 754 |
| rr::RValue<sw::SIMD::Int> Exponent(rr::RValue<sw::SIMD::Float> f); |
| |
| // Returns y if y < x; otherwise result is x. |
| // If one operand is a NaN, the other operand is the result. |
| // If both operands are NaN, the result is a NaN. |
| rr::RValue<sw::SIMD::Float> NMin(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y); |
| |
| // Returns y if y > x; otherwise result is x. |
| // If one operand is a NaN, the other operand is the result. |
| // If both operands are NaN, the result is a NaN. |
| rr::RValue<sw::SIMD::Float> NMax(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y); |
| |
| // Returns the determinant of a 2x2 matrix. |
| rr::RValue<sw::SIMD::Float> Determinant( |
| rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, |
| rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d); |
| |
| // Returns the determinant of a 3x3 matrix. |
| rr::RValue<sw::SIMD::Float> Determinant( |
| rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, |
| rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, |
| rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i); |
| |
| // Returns the determinant of a 4x4 matrix. |
| rr::RValue<sw::SIMD::Float> Determinant( |
| rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, |
| rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, |
| rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, |
| rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p); |
| |
| // Returns the inverse of a 2x2 matrix. |
| std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse( |
| rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, |
| rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d); |
| |
| // Returns the inverse of a 3x3 matrix. |
| std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse( |
| rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, |
| rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, |
| rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i); |
| |
| // Returns the inverse of a 4x4 matrix. |
| std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse( |
| rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d, |
| rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, |
| rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l, |
| rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p); |
| |
| //////////////////////////////////////////////////////////////////////////// |
| // Inline functions |
| //////////////////////////////////////////////////////////////////////////// |
| |
| template<typename T> |
| inline T SIMD::Pointer::Load(OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */) |
| { |
| using EL = typename Element<T>::type; |
| |
| if(isStaticallyInBounds(sizeof(float), robustness)) |
| { |
| // All elements are statically known to be in-bounds. |
| // We can avoid costly conditional on masks. |
| |
| if(hasStaticSequentialOffsets(sizeof(float))) |
| { |
| // Offsets are sequential. Perform regular load. |
| return rr::Load(rr::Pointer<T>(base + staticOffsets[0]), alignment, atomic, order); |
| } |
| if(hasStaticEqualOffsets()) |
| { |
| // Load one, replicate. |
| return T(*rr::Pointer<EL>(base + staticOffsets[0], alignment)); |
| } |
| } |
| else |
| { |
| switch(robustness) |
| { |
| case OutOfBoundsBehavior::Nullify: |
| case OutOfBoundsBehavior::RobustBufferAccess: |
| case OutOfBoundsBehavior::UndefinedValue: |
| mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads. |
| break; |
| case OutOfBoundsBehavior::UndefinedBehavior: |
| // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. |
| break; |
| } |
| } |
| |
| auto offs = offsets(); |
| |
| if(!atomic && order == std::memory_order_relaxed) |
| { |
| if(hasStaticEqualOffsets()) |
| { |
| // Load one, replicate. |
| // Be careful of the case where the post-bounds-check mask |
| // is 0, in which case we must not load. |
| T out = T(0); |
| If(AnyTrue(mask)) |
| { |
| EL el = *rr::Pointer<EL>(base + staticOffsets[0], alignment); |
| out = T(el); |
| } |
| return out; |
| } |
| |
| bool zeroMaskedLanes = true; |
| switch(robustness) |
| { |
| case OutOfBoundsBehavior::Nullify: |
| case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero. |
| zeroMaskedLanes = true; |
| break; |
| case OutOfBoundsBehavior::UndefinedValue: |
| case OutOfBoundsBehavior::UndefinedBehavior: |
| zeroMaskedLanes = false; |
| break; |
| } |
| |
| if(hasStaticSequentialOffsets(sizeof(float))) |
| { |
| return rr::MaskedLoad(rr::Pointer<T>(base + staticOffsets[0]), mask, alignment, zeroMaskedLanes); |
| } |
| |
| return rr::Gather(rr::Pointer<EL>(base), offs, mask, alignment, zeroMaskedLanes); |
| } |
| else |
| { |
| T out; |
| auto anyLanesDisabled = AnyFalse(mask); |
| If(hasEqualOffsets() && !anyLanesDisabled) |
| { |
| // Load one, replicate. |
| auto offset = Extract(offs, 0); |
| out = T(rr::Load(rr::Pointer<EL>(&base[offset]), alignment, atomic, order)); |
| } |
| Else If(hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled) |
| { |
| // Load all elements in a single SIMD instruction. |
| auto offset = Extract(offs, 0); |
| out = rr::Load(rr::Pointer<T>(&base[offset]), alignment, atomic, order); |
| } |
| Else |
| { |
| // Divergent offsets or masked lanes. |
| out = T(0); |
| for(int i = 0; i < SIMD::Width; i++) |
| { |
| If(Extract(mask, i) != 0) |
| { |
| auto offset = Extract(offs, i); |
| auto el = rr::Load(rr::Pointer<EL>(&base[offset]), alignment, atomic, order); |
| out = Insert(out, el, i); |
| } |
| } |
| } |
| return out; |
| } |
| } |
| |
| template<typename T> |
| inline void SIMD::Pointer::Store(T val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */) |
| { |
| using EL = typename Element<T>::type; |
| constexpr size_t alignment = sizeof(float); |
| auto offs = offsets(); |
| |
| switch(robustness) |
| { |
| case OutOfBoundsBehavior::Nullify: |
| case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking. |
| case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access. |
| mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes. |
| break; |
| case OutOfBoundsBehavior::UndefinedBehavior: |
| // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. |
| break; |
| } |
| |
| if(!atomic && order == std::memory_order_relaxed) |
| { |
| if(hasStaticEqualOffsets()) |
| { |
| If(AnyTrue(mask)) |
| { |
| // All equal. One of these writes will win -- elect the winning lane. |
| auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
| auto elect = mask & ~(v0111 & (mask.xxyz | mask.xxxy | mask.xxxx)); |
| auto maskedVal = As<SIMD::Int>(val) & elect; |
| auto scalarVal = Extract(maskedVal, 0) | |
| Extract(maskedVal, 1) | |
| Extract(maskedVal, 2) | |
| Extract(maskedVal, 3); |
| *rr::Pointer<EL>(base + staticOffsets[0], alignment) = As<EL>(scalarVal); |
| } |
| } |
| else if(hasStaticSequentialOffsets(sizeof(float))) |
| { |
| if(isStaticallyInBounds(sizeof(float), robustness)) |
| { |
| // Pointer has no elements OOB, and the store is not atomic. |
| // Perform a RMW. |
| auto p = rr::Pointer<SIMD::Int>(base + staticOffsets[0], alignment); |
| auto prev = *p; |
| *p = (prev & ~mask) | (As<SIMD::Int>(val) & mask); |
| } |
| else |
| { |
| rr::MaskedStore(rr::Pointer<T>(base + staticOffsets[0]), val, mask, alignment); |
| } |
| } |
| else |
| { |
| rr::Scatter(rr::Pointer<EL>(base), val, offs, mask, alignment); |
| } |
| } |
| else |
| { |
| auto anyLanesDisabled = AnyFalse(mask); |
| If(hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled) |
| { |
| // Store all elements in a single SIMD instruction. |
| auto offset = Extract(offs, 0); |
| rr::Store(val, rr::Pointer<T>(&base[offset]), alignment, atomic, order); |
| } |
| Else |
| { |
| // Divergent offsets or masked lanes. |
| for(int i = 0; i < SIMD::Width; i++) |
| { |
| If(Extract(mask, i) != 0) |
| { |
| auto offset = Extract(offs, i); |
| rr::Store(Extract(val, i), rr::Pointer<EL>(&base[offset]), alignment, atomic, order); |
| } |
| } |
| } |
| } |
| } |
| |
| template<typename T> |
| inline void SIMD::Pointer::Store(RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */) |
| { |
| Store(T(val), robustness, mask, atomic, order); |
| } |
| |
| template<typename T> |
| inline rr::RValue<T> AndAll(rr::RValue<T> const &mask) |
| { |
| T v1 = mask; // [x] [y] [z] [w] |
| T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw] |
| return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw] |
| } |
| |
| template<typename T> |
| inline rr::RValue<T> OrAll(rr::RValue<T> const &mask) |
| { |
| T v1 = mask; // [x] [y] [z] [w] |
| T v2 = v1.xzxz | v1.ywyw; // [xy] [zw] [xy] [zw] |
| return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw] |
| } |
| |
| } // namespace sw |
| |
| #ifdef ENABLE_RR_PRINT |
| namespace rr { |
| template<> |
| struct PrintValue::Ty<sw::Vector4f> |
| { |
| static std::string fmt(const sw::Vector4f &v) |
| { |
| return "[x: " + PrintValue::fmt(v.x) + |
| ", y: " + PrintValue::fmt(v.y) + |
| ", z: " + PrintValue::fmt(v.z) + |
| ", w: " + PrintValue::fmt(v.w) + "]"; |
| } |
| |
| static std::vector<rr::Value *> val(const sw::Vector4f &v) |
| { |
| return PrintValue::vals(v.x, v.y, v.z, v.w); |
| } |
| }; |
| template<> |
| struct PrintValue::Ty<sw::Vector4s> |
| { |
| static std::string fmt(const sw::Vector4s &v) |
| { |
| return "[x: " + PrintValue::fmt(v.x) + |
| ", y: " + PrintValue::fmt(v.y) + |
| ", z: " + PrintValue::fmt(v.z) + |
| ", w: " + PrintValue::fmt(v.w) + "]"; |
| } |
| |
| static std::vector<rr::Value *> val(const sw::Vector4s &v) |
| { |
| return PrintValue::vals(v.x, v.y, v.z, v.w); |
| } |
| }; |
| template<> |
| struct PrintValue::Ty<sw::SIMD::Pointer> |
| { |
| static std::string fmt(const sw::SIMD::Pointer &v) |
| { |
| return "{" + PrintValue::fmt(v.base) + " +" + PrintValue::fmt(v.offsets()) + "}"; |
| } |
| |
| static std::vector<rr::Value *> val(const sw::SIMD::Pointer &v) |
| { |
| return PrintValue::vals(v.base, v.offsets()); |
| } |
| }; |
| } // namespace rr |
| #endif // ENABLE_RR_PRINT |
| |
| #endif // sw_ShaderCore_hpp |