blob: bbd4c2ab8b99299ad9fa33fc2fbd20c4f99ebd77 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef sw_ShaderCore_hpp
16#define sw_ShaderCore_hpp
17
Ben Clayton23778452019-11-19 14:15:41 +000018#include "Reactor/Print.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040019#include "Reactor/Reactor.hpp"
Ben Clayton25e06e02020-02-07 11:19:08 +000020#include "System/Debug.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040021
Ben Clayton284953f2019-11-28 10:53:39 +000022#include <array>
Ben Claytonbc1c067be2019-12-17 20:37:37 +000023#include <atomic> // std::memory_order
24#include <utility> // std::pair
Ben Clayton284953f2019-11-28 10:53:39 +000025
Nicolas Capens157ba262019-12-10 17:49:14 -050026namespace sw {
27
28using namespace rr;
29
30class Vector4s
Nicolas Capens68a82382018-10-02 13:16:55 -040031{
Nicolas Capens157ba262019-12-10 17:49:14 -050032public:
33 Vector4s();
34 Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
35 Vector4s(const Vector4s &rhs);
Nicolas Capens48461502018-08-06 14:20:45 -040036
Nicolas Capens157ba262019-12-10 17:49:14 -050037 Short4 &operator[](int i);
38 Vector4s &operator=(const Vector4s &rhs);
Nicolas Capens68a82382018-10-02 13:16:55 -040039
Nicolas Capens157ba262019-12-10 17:49:14 -050040 Short4 x;
41 Short4 y;
42 Short4 z;
43 Short4 w;
44};
Nicolas Capens68a82382018-10-02 13:16:55 -040045
Nicolas Capens157ba262019-12-10 17:49:14 -050046class Vector4f
47{
48public:
49 Vector4f();
50 Vector4f(float x, float y, float z, float w);
51 Vector4f(const Vector4f &rhs);
Nicolas Capens68a82382018-10-02 13:16:55 -040052
Nicolas Capens157ba262019-12-10 17:49:14 -050053 Float4 &operator[](int i);
54 Vector4f &operator=(const Vector4f &rhs);
Nicolas Capens68a82382018-10-02 13:16:55 -040055
Nicolas Capens157ba262019-12-10 17:49:14 -050056 Float4 x;
57 Float4 y;
58 Float4 z;
59 Float4 w;
60};
Nicolas Capens68a82382018-10-02 13:16:55 -040061
Nicolas Capens157ba262019-12-10 17:49:14 -050062enum class OutOfBoundsBehavior
63{
64 Nullify, // Loads become zero, stores are elided.
65 RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing).
66 UndefinedValue, // Only for load operations. Not secure. No program termination.
67 UndefinedBehavior, // Program may terminate.
68};
Nicolas Capens68a82382018-10-02 13:16:55 -040069
Nicolas Capens157ba262019-12-10 17:49:14 -050070// SIMD contains types that represent multiple scalars packed into a single
71// vector data type. Types in the SIMD namespace provide a semantic hint
72// that the data should be treated as a per-execution-lane scalar instead of
73// a typical euclidean-style vector type.
74namespace SIMD {
Ben Clayton284953f2019-11-28 10:53:39 +000075
Nicolas Capens157ba262019-12-10 17:49:14 -050076// Width is the number of per-lane scalars packed into each SIMD vector.
77static constexpr int Width = 4;
Ben Clayton284953f2019-11-28 10:53:39 +000078
Nicolas Capens157ba262019-12-10 17:49:14 -050079using Float = rr::Float4;
80using Int = rr::Int4;
81using UInt = rr::UInt4;
Ben Clayton284953f2019-11-28 10:53:39 +000082
Nicolas Capens157ba262019-12-10 17:49:14 -050083struct Pointer
84{
85 Pointer(rr::Pointer<Byte> base, rr::Int limit);
86 Pointer(rr::Pointer<Byte> base, unsigned int limit);
87 Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset);
88 Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset);
Ben Clayton284953f2019-11-28 10:53:39 +000089
Ben Claytonbc1c067be2019-12-17 20:37:37 +000090 Pointer &operator+=(Int i);
91 Pointer &operator*=(Int i);
Ben Clayton284953f2019-11-28 10:53:39 +000092
Ben Claytonbc1c067be2019-12-17 20:37:37 +000093 Pointer operator+(SIMD::Int i);
94 Pointer operator*(SIMD::Int i);
Ben Clayton284953f2019-11-28 10:53:39 +000095
Ben Claytonbc1c067be2019-12-17 20:37:37 +000096 Pointer &operator+=(int i);
97 Pointer &operator*=(int i);
Ben Clayton284953f2019-11-28 10:53:39 +000098
Ben Claytonbc1c067be2019-12-17 20:37:37 +000099 Pointer operator+(int i);
100 Pointer operator*(int i);
Ben Clayton284953f2019-11-28 10:53:39 +0000101
Nicolas Capens157ba262019-12-10 17:49:14 -0500102 SIMD::Int offsets() const;
Ben Clayton284953f2019-11-28 10:53:39 +0000103
Nicolas Capens157ba262019-12-10 17:49:14 -0500104 SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const;
Ben Clayton284953f2019-11-28 10:53:39 +0000105
Nicolas Capens157ba262019-12-10 17:49:14 -0500106 bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const;
Ben Clayton284953f2019-11-28 10:53:39 +0000107
Nicolas Capens157ba262019-12-10 17:49:14 -0500108 Int limit() const;
Ben Clayton284953f2019-11-28 10:53:39 +0000109
Nicolas Capens157ba262019-12-10 17:49:14 -0500110 // Returns true if all offsets are sequential
111 // (N+0*step, N+1*step, N+2*step, N+3*step)
112 rr::Bool hasSequentialOffsets(unsigned int step) const;
Ben Clayton284953f2019-11-28 10:53:39 +0000113
Nicolas Capens157ba262019-12-10 17:49:14 -0500114 // Returns true if all offsets are are compile-time static and
115 // sequential (N+0*step, N+1*step, N+2*step, N+3*step)
116 bool hasStaticSequentialOffsets(unsigned int step) const;
Ben Clayton284953f2019-11-28 10:53:39 +0000117
Nicolas Capens157ba262019-12-10 17:49:14 -0500118 // Returns true if all offsets are equal (N, N, N, N)
119 rr::Bool hasEqualOffsets() const;
Ben Clayton284953f2019-11-28 10:53:39 +0000120
Nicolas Capens157ba262019-12-10 17:49:14 -0500121 // Returns true if all offsets are compile-time static and are equal
122 // (N, N, N, N)
123 bool hasStaticEqualOffsets() const;
Ben Clayton284953f2019-11-28 10:53:39 +0000124
125 template<typename T>
Nicolas Capens157ba262019-12-10 17:49:14 -0500126 inline T Load(OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
127
128 template<typename T>
129 inline void Store(T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
130
131 template<typename T>
132 inline void Store(RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
133
134 // Base address for the pointer, common across all lanes.
135 rr::Pointer<rr::Byte> base;
136
137 // Upper (non-inclusive) limit for offsets from base.
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000138 rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero.
Nicolas Capens157ba262019-12-10 17:49:14 -0500139 unsigned int staticLimit;
140
141 // Per lane offsets from base.
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000142 SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
Nicolas Capens157ba262019-12-10 17:49:14 -0500143 std::array<int32_t, SIMD::Width> staticOffsets;
144
145 bool hasDynamicLimit; // True if dynamicLimit is non-zero.
146 bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero.
147};
148
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000149template<typename T>
150struct Element
151{};
152template<>
153struct Element<Float>
154{
155 using type = rr::Float;
156};
157template<>
158struct Element<Int>
159{
160 using type = rr::Int;
161};
162template<>
163struct Element<UInt>
164{
165 using type = rr::UInt;
166};
Nicolas Capens157ba262019-12-10 17:49:14 -0500167
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000168} // namespace SIMD
Nicolas Capens157ba262019-12-10 17:49:14 -0500169
170Float4 exponential2(RValue<Float4> x, bool pp = false);
171Float4 logarithm2(RValue<Float4> x, bool pp = false);
172Float4 exponential(RValue<Float4> x, bool pp = false);
173Float4 logarithm(RValue<Float4> x, bool pp = false);
174Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
175Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
176Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
177Float4 modulo(RValue<Float4> x, RValue<Float4> y);
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000178Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
179Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
Nicolas Capens157ba262019-12-10 17:49:14 -0500180Float4 sine(RValue<Float4> x, bool pp = false);
181Float4 cosine(RValue<Float4> x, bool pp = false);
182Float4 tangent(RValue<Float4> x, bool pp = false);
183Float4 arccos(RValue<Float4> x, bool pp = false);
184Float4 arcsin(RValue<Float4> x, bool pp = false);
185Float4 arctan(RValue<Float4> x, bool pp = false);
186Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
187Float4 sineh(RValue<Float4> x, bool pp = false);
188Float4 cosineh(RValue<Float4> x, bool pp = false);
189Float4 tangenth(RValue<Float4> x, bool pp = false);
190Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1
191Float4 arcsinh(RValue<Float4> x, bool pp = false);
192Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range
193
194Float4 dot2(const Vector4f &v0, const Vector4f &v1);
195Float4 dot3(const Vector4f &v0, const Vector4f &v1);
196Float4 dot4(const Vector4f &v0, const Vector4f &v1);
197
198void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
199void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
200void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
201void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
202void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
203void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
204void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
205void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
206
Alexis Hetu24c49dd2019-12-13 16:32:43 -0500207sw::SIMD::UInt halfToFloatBits(sw::SIMD::UInt halfBits);
208sw::SIMD::UInt floatToHalfBits(sw::SIMD::UInt floatBits, bool storeInUpperBits);
Nicolas Capens0405ba02020-01-16 01:19:21 -0500209Float4 r11g11b10Unpack(UInt r11g11b10bits);
210UInt r11g11b10Pack(const Float4 &value);
211Vector4s a2b10g10r10Unpack(const Int4 &value);
212Vector4s a2r10g10b10Unpack(const Int4 &value);
Nicolas Capens157ba262019-12-10 17:49:14 -0500213
214rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints);
215
216rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints);
217
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000218template<typename T>
Nicolas Capens157ba262019-12-10 17:49:14 -0500219inline rr::RValue<T> AndAll(rr::RValue<T> const &mask);
220
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000221template<typename T>
Nicolas Capens157ba262019-12-10 17:49:14 -0500222inline rr::RValue<T> OrAll(rr::RValue<T> const &mask);
223
224rr::RValue<sw::SIMD::Float> Sign(rr::RValue<sw::SIMD::Float> const &val);
225
226// Returns the <whole, frac> of val.
227// Both whole and frac will have the same sign as val.
228std::pair<rr::RValue<sw::SIMD::Float>, rr::RValue<sw::SIMD::Float>>
229Modf(rr::RValue<sw::SIMD::Float> const &val);
230
231// Returns the number of 1s in bits, per lane.
232sw::SIMD::UInt CountBits(rr::RValue<sw::SIMD::UInt> const &bits);
233
234// Returns 1 << bits.
235// If the resulting bit overflows a 32 bit integer, 0 is returned.
236rr::RValue<sw::SIMD::UInt> NthBit32(rr::RValue<sw::SIMD::UInt> const &bits);
237
238// Returns bitCount number of of 1's starting from the LSB.
239rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount);
240
241// Performs a fused-multiply add, returning a * b + c.
242rr::RValue<sw::SIMD::Float> FMA(
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000243 rr::RValue<sw::SIMD::Float> const &a,
244 rr::RValue<sw::SIMD::Float> const &b,
245 rr::RValue<sw::SIMD::Float> const &c);
Nicolas Capens157ba262019-12-10 17:49:14 -0500246
247// Returns the exponent of the floating point number f.
248// Assumes IEEE 754
249rr::RValue<sw::SIMD::Int> Exponent(rr::RValue<sw::SIMD::Float> f);
250
251// Returns y if y < x; otherwise result is x.
252// If one operand is a NaN, the other operand is the result.
253// If both operands are NaN, the result is a NaN.
254rr::RValue<sw::SIMD::Float> NMin(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y);
255
256// Returns y if y > x; otherwise result is x.
257// If one operand is a NaN, the other operand is the result.
258// If both operands are NaN, the result is a NaN.
259rr::RValue<sw::SIMD::Float> NMax(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y);
260
261// Returns the determinant of a 2x2 matrix.
262rr::RValue<sw::SIMD::Float> Determinant(
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000263 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
264 rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
Nicolas Capens157ba262019-12-10 17:49:14 -0500265
266// Returns the determinant of a 3x3 matrix.
267rr::RValue<sw::SIMD::Float> Determinant(
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000268 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
269 rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
270 rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
Nicolas Capens157ba262019-12-10 17:49:14 -0500271
272// Returns the determinant of a 4x4 matrix.
273rr::RValue<sw::SIMD::Float> Determinant(
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000274 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
275 rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
276 rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
277 rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
Nicolas Capens157ba262019-12-10 17:49:14 -0500278
279// Returns the inverse of a 2x2 matrix.
280std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse(
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000281 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
282 rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d);
Nicolas Capens157ba262019-12-10 17:49:14 -0500283
284// Returns the inverse of a 3x3 matrix.
285std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse(
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000286 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
287 rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
288 rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i);
Nicolas Capens157ba262019-12-10 17:49:14 -0500289
290// Returns the inverse of a 4x4 matrix.
291std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse(
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000292 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
293 rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
294 rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
295 rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p);
Nicolas Capens157ba262019-12-10 17:49:14 -0500296
297////////////////////////////////////////////////////////////////////////////
298// Inline functions
299////////////////////////////////////////////////////////////////////////////
300
301template<typename T>
302inline T SIMD::Pointer::Load(OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
303{
304 using EL = typename Element<T>::type;
305
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500306 if(isStaticallyInBounds(sizeof(float), robustness))
Ben Clayton284953f2019-11-28 10:53:39 +0000307 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500308 // All elements are statically known to be in-bounds.
309 // We can avoid costly conditional on masks.
Ben Clayton284953f2019-11-28 10:53:39 +0000310
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500311 if(hasStaticSequentialOffsets(sizeof(float)))
Ben Clayton284953f2019-11-28 10:53:39 +0000312 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500313 // Offsets are sequential. Perform regular load.
314 return rr::Load(rr::Pointer<T>(base + staticOffsets[0]), alignment, atomic, order);
Ben Clayton284953f2019-11-28 10:53:39 +0000315 }
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500316 if(hasStaticEqualOffsets())
Ben Clayton284953f2019-11-28 10:53:39 +0000317 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500318 // Load one, replicate.
319 return T(*rr::Pointer<EL>(base + staticOffsets[0], alignment));
Ben Clayton284953f2019-11-28 10:53:39 +0000320 }
321 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500322 else
Ben Clayton284953f2019-11-28 10:53:39 +0000323 {
Ben Clayton284953f2019-11-28 10:53:39 +0000324 switch(robustness)
325 {
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000326 case OutOfBoundsBehavior::Nullify:
327 case OutOfBoundsBehavior::RobustBufferAccess:
328 case OutOfBoundsBehavior::UndefinedValue:
329 mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads.
330 break;
331 case OutOfBoundsBehavior::UndefinedBehavior:
332 // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
333 break;
Ben Clayton284953f2019-11-28 10:53:39 +0000334 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500335 }
Ben Clayton284953f2019-11-28 10:53:39 +0000336
Nicolas Capens157ba262019-12-10 17:49:14 -0500337 auto offs = offsets();
338
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500339 if(!atomic && order == std::memory_order_relaxed)
Nicolas Capens157ba262019-12-10 17:49:14 -0500340 {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500341 if(hasStaticEqualOffsets())
Ben Clayton284953f2019-11-28 10:53:39 +0000342 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500343 // Load one, replicate.
344 // Be careful of the case where the post-bounds-check mask
345 // is 0, in which case we must not load.
346 T out = T(0);
347 If(AnyTrue(mask))
Ben Clayton284953f2019-11-28 10:53:39 +0000348 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500349 EL el = *rr::Pointer<EL>(base + staticOffsets[0], alignment);
350 out = T(el);
351 }
352 return out;
353 }
354
355 bool zeroMaskedLanes = true;
356 switch(robustness)
357 {
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000358 case OutOfBoundsBehavior::Nullify:
359 case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero.
360 zeroMaskedLanes = true;
361 break;
362 case OutOfBoundsBehavior::UndefinedValue:
363 case OutOfBoundsBehavior::UndefinedBehavior:
364 zeroMaskedLanes = false;
365 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500366 }
367
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500368 if(hasStaticSequentialOffsets(sizeof(float)))
Nicolas Capens157ba262019-12-10 17:49:14 -0500369 {
370 return rr::MaskedLoad(rr::Pointer<T>(base + staticOffsets[0]), mask, alignment, zeroMaskedLanes);
371 }
372
373 return rr::Gather(rr::Pointer<EL>(base), offs, mask, alignment, zeroMaskedLanes);
374 }
375 else
376 {
377 T out;
378 auto anyLanesDisabled = AnyFalse(mask);
379 If(hasEqualOffsets() && !anyLanesDisabled)
380 {
381 // Load one, replicate.
382 auto offset = Extract(offs, 0);
383 out = T(rr::Load(rr::Pointer<EL>(&base[offset]), alignment, atomic, order));
384 }
385 Else If(hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
386 {
387 // Load all elements in a single SIMD instruction.
388 auto offset = Extract(offs, 0);
389 out = rr::Load(rr::Pointer<T>(&base[offset]), alignment, atomic, order);
390 }
391 Else
392 {
393 // Divergent offsets or masked lanes.
394 out = T(0);
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500395 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500396 {
397 If(Extract(mask, i) != 0)
Ben Clayton284953f2019-11-28 10:53:39 +0000398 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500399 auto offset = Extract(offs, i);
400 auto el = rr::Load(rr::Pointer<EL>(&base[offset]), alignment, atomic, order);
401 out = Insert(out, el, i);
Ben Clayton284953f2019-11-28 10:53:39 +0000402 }
403 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500404 }
405 return out;
406 }
407}
408
409template<typename T>
410inline void SIMD::Pointer::Store(T val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
411{
412 using EL = typename Element<T>::type;
413 constexpr size_t alignment = sizeof(float);
414 auto offs = offsets();
415
416 switch(robustness)
417 {
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000418 case OutOfBoundsBehavior::Nullify:
419 case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking.
420 case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access.
421 mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes.
422 break;
423 case OutOfBoundsBehavior::UndefinedBehavior:
424 // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
425 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500426 }
427
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500428 if(!atomic && order == std::memory_order_relaxed)
Nicolas Capens157ba262019-12-10 17:49:14 -0500429 {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500430 if(hasStaticEqualOffsets())
Nicolas Capens157ba262019-12-10 17:49:14 -0500431 {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500432 If(AnyTrue(mask))
Ben Clayton284953f2019-11-28 10:53:39 +0000433 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500434 // All equal. One of these writes will win -- elect the winning lane.
435 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
436 auto elect = mask & ~(v0111 & (mask.xxyz | mask.xxxy | mask.xxxx));
437 auto maskedVal = As<SIMD::Int>(val) & elect;
438 auto scalarVal = Extract(maskedVal, 0) |
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000439 Extract(maskedVal, 1) |
440 Extract(maskedVal, 2) |
441 Extract(maskedVal, 3);
Nicolas Capens157ba262019-12-10 17:49:14 -0500442 *rr::Pointer<EL>(base + staticOffsets[0], alignment) = As<EL>(scalarVal);
443 }
444 }
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500445 else if(hasStaticSequentialOffsets(sizeof(float)))
Nicolas Capens157ba262019-12-10 17:49:14 -0500446 {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500447 if(isStaticallyInBounds(sizeof(float), robustness))
Nicolas Capens157ba262019-12-10 17:49:14 -0500448 {
449 // Pointer has no elements OOB, and the store is not atomic.
450 // Perform a RMW.
451 auto p = rr::Pointer<SIMD::Int>(base + staticOffsets[0], alignment);
452 auto prev = *p;
453 *p = (prev & ~mask) | (As<SIMD::Int>(val) & mask);
Ben Clayton284953f2019-11-28 10:53:39 +0000454 }
455 else
456 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500457 rr::MaskedStore(rr::Pointer<T>(base + staticOffsets[0]), val, mask, alignment);
Ben Clayton284953f2019-11-28 10:53:39 +0000458 }
459 }
460 else
461 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500462 rr::Scatter(rr::Pointer<EL>(base), val, offs, mask, alignment);
463 }
464 }
465 else
466 {
467 auto anyLanesDisabled = AnyFalse(mask);
468 If(hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
469 {
470 // Store all elements in a single SIMD instruction.
471 auto offset = Extract(offs, 0);
472 rr::Store(val, rr::Pointer<T>(&base[offset]), alignment, atomic, order);
473 }
474 Else
475 {
476 // Divergent offsets or masked lanes.
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500477 for(int i = 0; i < SIMD::Width; i++)
Ben Clayton284953f2019-11-28 10:53:39 +0000478 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500479 If(Extract(mask, i) != 0)
Ben Clayton284953f2019-11-28 10:53:39 +0000480 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500481 auto offset = Extract(offs, i);
482 rr::Store(Extract(val, i), rr::Pointer<EL>(&base[offset]), alignment, atomic, order);
Ben Clayton284953f2019-11-28 10:53:39 +0000483 }
484 }
485 }
486 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500487}
Ben Clayton284953f2019-11-28 10:53:39 +0000488
Nicolas Capens157ba262019-12-10 17:49:14 -0500489template<typename T>
490inline void SIMD::Pointer::Store(RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
491{
492 Store(T(val), robustness, mask, atomic, order);
493}
Ben Clayton284953f2019-11-28 10:53:39 +0000494
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000495template<typename T>
Nicolas Capens157ba262019-12-10 17:49:14 -0500496inline rr::RValue<T> AndAll(rr::RValue<T> const &mask)
497{
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000498 T v1 = mask; // [x] [y] [z] [w]
499 T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw]
500 return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
Nicolas Capens157ba262019-12-10 17:49:14 -0500501}
Ben Clayton284953f2019-11-28 10:53:39 +0000502
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000503template<typename T>
Nicolas Capens157ba262019-12-10 17:49:14 -0500504inline rr::RValue<T> OrAll(rr::RValue<T> const &mask)
505{
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000506 T v1 = mask; // [x] [y] [z] [w]
507 T v2 = v1.xzxz | v1.ywyw; // [xy] [zw] [xy] [zw]
508 return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
Nicolas Capens157ba262019-12-10 17:49:14 -0500509}
Ben Clayton284953f2019-11-28 10:53:39 +0000510
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000511} // namespace sw
Nicolas Capens68a82382018-10-02 13:16:55 -0400512
Ben Claytona6833282019-05-28 17:15:02 +0100513#ifdef ENABLE_RR_PRINT
514namespace rr {
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000515template<>
516struct PrintValue::Ty<sw::Vector4f>
Nicolas Capens157ba262019-12-10 17:49:14 -0500517{
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000518 static std::string fmt(const sw::Vector4f &v)
Ben Claytona6833282019-05-28 17:15:02 +0100519 {
Ben Clayton71af5932019-12-11 10:05:24 +0000520 return "[x: " + PrintValue::fmt(v.x) +
521 ", y: " + PrintValue::fmt(v.y) +
522 ", z: " + PrintValue::fmt(v.z) +
523 ", w: " + PrintValue::fmt(v.w) + "]";
Nicolas Capens157ba262019-12-10 17:49:14 -0500524 }
Ben Claytona6833282019-05-28 17:15:02 +0100525
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000526 static std::vector<rr::Value *> val(const sw::Vector4f &v)
Ben Claytona6833282019-05-28 17:15:02 +0100527 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500528 return PrintValue::vals(v.x, v.y, v.z, v.w);
529 }
530};
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000531template<>
532struct PrintValue::Ty<sw::Vector4s>
Nicolas Capens157ba262019-12-10 17:49:14 -0500533{
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000534 static std::string fmt(const sw::Vector4s &v)
Nicolas Capens157ba262019-12-10 17:49:14 -0500535 {
Ben Clayton71af5932019-12-11 10:05:24 +0000536 return "[x: " + PrintValue::fmt(v.x) +
537 ", y: " + PrintValue::fmt(v.y) +
538 ", z: " + PrintValue::fmt(v.z) +
539 ", w: " + PrintValue::fmt(v.w) + "]";
Nicolas Capens157ba262019-12-10 17:49:14 -0500540 }
Ben Claytona6833282019-05-28 17:15:02 +0100541
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000542 static std::vector<rr::Value *> val(const sw::Vector4s &v)
Nicolas Capens157ba262019-12-10 17:49:14 -0500543 {
544 return PrintValue::vals(v.x, v.y, v.z, v.w);
545 }
546};
547
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000548} // namespace rr
549#endif // ENABLE_RR_PRINT
Ben Claytona6833282019-05-28 17:15:02 +0100550
Ben Claytonbc1c067be2019-12-17 20:37:37 +0000551#endif // sw_ShaderCore_hpp