blob: 6a0b2853bfc4913bedb8e8555eed81e9d05660f8 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef sw_ShaderCore_hpp
16#define sw_ShaderCore_hpp
17
Nicolas Capens0bac2852016-05-07 06:09:58 -040018#include "Shader.hpp"
19#include "Reactor/Reactor.hpp"
Antonio Maiorano415d1812020-02-11 16:22:55 -050020#include "Reactor/Print.hpp"
Nicolas Capens708c24b2017-10-26 13:07:10 -040021#include "Common/Debug.hpp"
Nicolas Capens0bac2852016-05-07 06:09:58 -040022
23namespace sw
24{
Nicolas Capens48461502018-08-06 14:20:45 -040025 using namespace rr;
26
Nicolas Capens0bac2852016-05-07 06:09:58 -040027 class Vector4s
28 {
29 public:
30 Vector4s();
31 Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
32 Vector4s(const Vector4s &rhs);
33
34 Short4 &operator[](int i);
35 Vector4s &operator=(const Vector4s &rhs);
36
37 Short4 x;
38 Short4 y;
39 Short4 z;
40 Short4 w;
41 };
42
Nicolas Capens0bac2852016-05-07 06:09:58 -040043 class Vector4f
44 {
45 public:
46 Vector4f();
47 Vector4f(float x, float y, float z, float w);
48 Vector4f(const Vector4f &rhs);
49
50 Float4 &operator[](int i);
51 Vector4f &operator=(const Vector4f &rhs);
52
53 Float4 x;
54 Float4 y;
55 Float4 z;
56 Float4 w;
57 };
58
59 Float4 exponential2(RValue<Float4> x, bool pp = false);
60 Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false);
61 Float4 exponential(RValue<Float4> x, bool pp = false);
62 Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false);
63 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
64 Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
65 Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
66 Float4 modulo(RValue<Float4> x, RValue<Float4> y);
67 Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
68 Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
69 Float4 sine(RValue<Float4> x, bool pp = false);
70 Float4 cosine(RValue<Float4> x, bool pp = false);
71 Float4 tangent(RValue<Float4> x, bool pp = false);
72 Float4 arccos(RValue<Float4> x, bool pp = false);
73 Float4 arcsin(RValue<Float4> x, bool pp = false);
74 Float4 arctan(RValue<Float4> x, bool pp = false);
75 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
76 Float4 sineh(RValue<Float4> x, bool pp = false);
77 Float4 cosineh(RValue<Float4> x, bool pp = false);
78 Float4 tangenth(RValue<Float4> x, bool pp = false);
79 Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1
80 Float4 arcsinh(RValue<Float4> x, bool pp = false);
81 Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range
82
83 Float4 dot2(const Vector4f &v0, const Vector4f &v1);
84 Float4 dot3(const Vector4f &v0, const Vector4f &v1);
85 Float4 dot4(const Vector4f &v0, const Vector4f &v1);
86
87 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
Nicolas Capense4a88b92017-11-30 00:14:57 -050088 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
Nicolas Capens0bac2852016-05-07 06:09:58 -040089 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
90 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
91 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
92 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
93 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
Nicolas Capens0bac2852016-05-07 06:09:58 -040094 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
95
96 class Register
97 {
98 public:
99 Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w)
100 {
101 }
102
103 Reference<Float4> &operator[](int i)
104 {
105 switch(i)
106 {
107 default:
108 case 0: return x;
109 case 1: return y;
110 case 2: return z;
111 case 3: return w;
112 }
113 }
114
115 Register &operator=(const Register &rhs)
116 {
117 x = rhs.x;
118 y = rhs.y;
119 z = rhs.z;
120 w = rhs.w;
121
122 return *this;
123 }
124
125 Register &operator=(const Vector4f &rhs)
126 {
127 x = rhs.x;
128 y = rhs.y;
129 z = rhs.z;
130 w = rhs.w;
131
132 return *this;
133 }
134
135 operator Vector4f()
136 {
137 Vector4f v;
138
139 v.x = x;
140 v.y = y;
141 v.z = z;
142 v.w = w;
143
144 return v;
145 }
146
147 Reference<Float4> x;
148 Reference<Float4> y;
149 Reference<Float4> z;
150 Reference<Float4> w;
151 };
152
Nicolas Capens5bff4052018-05-28 13:18:59 -0400153 class RegisterFile
Nicolas Capens0bac2852016-05-07 06:09:58 -0400154 {
155 public:
Nicolas Capens5bff4052018-05-28 13:18:59 -0400156 RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable)
Nicolas Capens0bac2852016-05-07 06:09:58 -0400157 {
Nicolas Capens5bff4052018-05-28 13:18:59 -0400158 if(indirectAddressable)
Nicolas Capens0bac2852016-05-07 06:09:58 -0400159 {
Nicolas Capens5bff4052018-05-28 13:18:59 -0400160 x = new Array<Float4>(size);
161 y = new Array<Float4>(size);
162 z = new Array<Float4>(size);
163 w = new Array<Float4>(size);
Nicolas Capens0bac2852016-05-07 06:09:58 -0400164 }
165 else
166 {
Nicolas Capens5bff4052018-05-28 13:18:59 -0400167 x = new Array<Float4>[size];
168 y = new Array<Float4>[size];
169 z = new Array<Float4>[size];
170 w = new Array<Float4>[size];
Nicolas Capens0bac2852016-05-07 06:09:58 -0400171 }
172 }
173
Nicolas Capens5bff4052018-05-28 13:18:59 -0400174 ~RegisterFile()
Nicolas Capens0bac2852016-05-07 06:09:58 -0400175 {
Nicolas Capens5bff4052018-05-28 13:18:59 -0400176 if(indirectAddressable)
Nicolas Capens0bac2852016-05-07 06:09:58 -0400177 {
178 delete x;
179 delete y;
180 delete z;
181 delete w;
182 }
183 else
184 {
185 delete[] x;
186 delete[] y;
187 delete[] z;
188 delete[] w;
189 }
190 }
191
192 Register operator[](int i)
193 {
Ben Clayton90d251a2019-05-09 14:06:27 +0100194 ASSERT(i < size);
Nicolas Capens5bff4052018-05-28 13:18:59 -0400195 if(indirectAddressable)
Nicolas Capens0bac2852016-05-07 06:09:58 -0400196 {
197 return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
198 }
199 else
200 {
201 return Register(x[i][0], y[i][0], z[i][0], w[i][0]);
202 }
203 }
204
205 Register operator[](RValue<Int> i)
206 {
Nicolas Capens5bff4052018-05-28 13:18:59 -0400207 ASSERT(indirectAddressable);
Nicolas Capens0bac2852016-05-07 06:09:58 -0400208
209 return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
210 }
211
Nicolas Capens5bff4052018-05-28 13:18:59 -0400212 const Vector4f operator[](RValue<Int4> i); // Gather operation (read only).
213
214 void scatter_x(Int4 i, RValue<Float4> r);
215 void scatter_y(Int4 i, RValue<Float4> r);
216 void scatter_z(Int4 i, RValue<Float4> r);
217 void scatter_w(Int4 i, RValue<Float4> r);
218
219 protected:
220 const int size;
221 const bool indirectAddressable;
Nicolas Capens0bac2852016-05-07 06:09:58 -0400222 Array<Float4> *x;
223 Array<Float4> *y;
224 Array<Float4> *z;
225 Array<Float4> *w;
226 };
227
Nicolas Capens5bff4052018-05-28 13:18:59 -0400228 template<int S, bool I = false>
229 class RegisterArray : public RegisterFile
230 {
231 public:
232 RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable)
233 {
234 }
235 };
236
Nicolas Capens0bac2852016-05-07 06:09:58 -0400237 class ShaderCore
238 {
239 typedef Shader::Control Control;
240
241 public:
242 void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false);
243 void neg(Vector4f &dst, const Vector4f &src);
244 void ineg(Vector4f &dst, const Vector4f &src);
245 void f2b(Vector4f &dst, const Vector4f &src);
246 void b2f(Vector4f &dst, const Vector4f &src);
247 void f2i(Vector4f &dst, const Vector4f &src);
248 void i2f(Vector4f &dst, const Vector4f &src);
249 void f2u(Vector4f &dst, const Vector4f &src);
250 void u2f(Vector4f &dst, const Vector4f &src);
251 void i2b(Vector4f &dst, const Vector4f &src);
252 void b2i(Vector4f &dst, const Vector4f &src);
253 void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
254 void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
255 void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
256 void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
257 void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
258 void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
259 void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
260 void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
261 void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
262 void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
263 void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
264 void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
265 void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
266 void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
267 void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
268 void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
269 void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
270 void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
271 void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
272 void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
273 void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
274 void len2(Float4 &dst, const Vector4f &src, bool pp = false);
275 void len3(Float4 &dst, const Vector4f &src, bool pp = false);
276 void len4(Float4 &dst, const Vector4f &src, bool pp = false);
277 void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
278 void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
279 void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
280 void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
281 void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
282 void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
283 void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
284 void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
285 void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
286 void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
287 void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
288 void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
289 void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
290 void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
291 void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
292 void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
293 void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
294 void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
295 void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
296 void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
297 void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
298 void exp2(Vector4f &dst, const Vector4f &src, bool pp = false);
299 void exp(Vector4f &dst, const Vector4f &src, bool pp = false);
300 void log2x(Vector4f &dst, const Vector4f &src, bool pp = false);
301 void log2(Vector4f &dst, const Vector4f &src, bool pp = false);
302 void log(Vector4f &dst, const Vector4f &src, bool pp = false);
303 void lit(Vector4f &dst, const Vector4f &src);
304 void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
305 void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
Alexis Hetu8ef6d102017-11-09 15:49:09 -0500306 void isinf(Vector4f &dst, const Vector4f &src);
307 void isnan(Vector4f &dst, const Vector4f &src);
Nicolas Capens0bac2852016-05-07 06:09:58 -0400308 void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
309 void packHalf2x16(Vector4f &dst, const Vector4f &src);
310 void unpackHalf2x16(Vector4f &dst, const Vector4f &src);
311 void packSnorm2x16(Vector4f &dst, const Vector4f &src);
312 void packUnorm2x16(Vector4f &dst, const Vector4f &src);
313 void unpackSnorm2x16(Vector4f &dst, const Vector4f &src);
314 void unpackUnorm2x16(Vector4f &dst, const Vector4f &src);
315 void frc(Vector4f &dst, const Vector4f &src);
316 void trunc(Vector4f &dst, const Vector4f &src);
317 void floor(Vector4f &dst, const Vector4f &src);
318 void round(Vector4f &dst, const Vector4f &src);
319 void roundEven(Vector4f &dst, const Vector4f &src);
320 void ceil(Vector4f &dst, const Vector4f &src);
321 void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
322 void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
323 void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
324 void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
325 void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
326 void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
327 void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
328 void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
329 void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
330 void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
331 void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
332 void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
333 void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
334 void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
335 void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
336 void sgn(Vector4f &dst, const Vector4f &src);
337 void isgn(Vector4f &dst, const Vector4f &src);
338 void abs(Vector4f &dst, const Vector4f &src);
339 void iabs(Vector4f &dst, const Vector4f &src);
340 void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
341 void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
342 void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
343 void sincos(Vector4f &dst, const Vector4f &src, bool pp = false);
344 void cos(Vector4f &dst, const Vector4f &src, bool pp = false);
345 void sin(Vector4f &dst, const Vector4f &src, bool pp = false);
346 void tan(Vector4f &dst, const Vector4f &src, bool pp = false);
347 void acos(Vector4f &dst, const Vector4f &src, bool pp = false);
348 void asin(Vector4f &dst, const Vector4f &src, bool pp = false);
349 void atan(Vector4f &dst, const Vector4f &src, bool pp = false);
350 void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
351 void cosh(Vector4f &dst, const Vector4f &src, bool pp = false);
352 void sinh(Vector4f &dst, const Vector4f &src, bool pp = false);
353 void tanh(Vector4f &dst, const Vector4f &src, bool pp = false);
354 void acosh(Vector4f &dst, const Vector4f &src, bool pp = false);
355 void asinh(Vector4f &dst, const Vector4f &src, bool pp = false);
356 void atanh(Vector4f &dst, const Vector4f &src, bool pp = false);
Alexis Hetu53ad4af2017-12-06 14:49:07 -0500357 void expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
358 void logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
Nicolas Capens0bac2852016-05-07 06:09:58 -0400359 void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
360 void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
361 void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
362 void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
363 void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
364 void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
365 void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
366 void all(Float4 &dst, const Vector4f &src);
367 void any(Float4 &dst, const Vector4f &src);
Alexis Hetu24f454e2016-08-31 17:22:13 -0400368 void bitwise_not(Vector4f &dst, const Vector4f &src);
369 void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
370 void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
371 void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
Nicolas Capens0bac2852016-05-07 06:09:58 -0400372 void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
373 void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
374
375 private:
376 void sgn(Float4 &dst, const Float4 &src);
377 void isgn(Float4 &dst, const Float4 &src);
378 void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
379 void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
380 void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
381 void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits);
382 void halfToFloatBits(Float4& dst, const Float4& halfBits);
383 };
384}
385
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000386#ifdef ENABLE_RR_PRINT
387namespace rr {
388 template <> struct PrintValue::Ty<sw::Vector4f>
389 {
Ben Claytona6833282019-05-28 17:15:02 +0100390 static std::string fmt(const sw::Vector4f& v)
391 {
Ben Clayton71af5932019-12-11 10:05:24 +0000392 return "[x: " + PrintValue::fmt(v.x) +
393 ", y: " + PrintValue::fmt(v.y) +
394 ", z: " + PrintValue::fmt(v.z) +
395 ", w: " + PrintValue::fmt(v.w) + "]";
Ben Claytona6833282019-05-28 17:15:02 +0100396 }
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000397
398 static std::vector<rr::Value*> val(const sw::Vector4f& v)
399 {
400 return PrintValue::vals(v.x, v.y, v.z, v.w);
401 }
402 };
403 template <> struct PrintValue::Ty<sw::Vector4s>
404 {
Ben Claytona6833282019-05-28 17:15:02 +0100405 static std::string fmt(const sw::Vector4s& v)
406 {
Ben Clayton71af5932019-12-11 10:05:24 +0000407 return "[x: " + PrintValue::fmt(v.x) +
408 ", y: " + PrintValue::fmt(v.y) +
409 ", z: " + PrintValue::fmt(v.z) +
410 ", w: " + PrintValue::fmt(v.w) + "]";
Ben Claytona6833282019-05-28 17:15:02 +0100411 }
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000412
413 static std::vector<rr::Value*> val(const sw::Vector4s& v)
414 {
415 return PrintValue::vals(v.x, v.y, v.z, v.w);
416 }
417 };
418}
419#endif // ENABLE_RR_PRINT
420
Nicolas Capens0bac2852016-05-07 06:09:58 -0400421#endif // sw_ShaderCore_hpp