Blame - src/Shader/ShaderCore.cpp - SwiftShader

blob: 90a28bfd43b3533f5365870bf1728d0722181c36 [file] [log] [blame]

Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1	// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	2	//
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	6	//
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	7	// http://www.apache.org/licenses/LICENSE-2.0
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	8	//
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	14
				15	#include "ShaderCore.hpp"
				16
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	17	#include "Renderer/Renderer.hpp"
				18	#include "Common/Debug.hpp"
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	19
Alexis Hetu	d5c31da	2015-08-28 14:39:13 -0400	[diff] [blame]	20	#include <limits.h>
				21
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	22	namespace sw
				23	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	24	extern TranscendentalPrecision logPrecision;
				25	extern TranscendentalPrecision expPrecision;
				26	extern TranscendentalPrecision rcpPrecision;
				27	extern TranscendentalPrecision rsqPrecision;
				28
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	29	Vector4s::Vector4s()
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	30	{
				31	}
				32
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	33	Vector4s::Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	34	{
				35	this->x = Short4(x);
				36	this->y = Short4(y);
				37	this->z = Short4(z);
				38	this->w = Short4(w);
				39	}
				40
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	41	Vector4s::Vector4s(const Vector4s &rhs)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	42	{
				43	x = rhs.x;
				44	y = rhs.y;
				45	z = rhs.z;
				46	w = rhs.w;
				47	}
				48
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	49	Vector4s &Vector4s::operator=(const Vector4s &rhs)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	50	{
				51	x = rhs.x;
				52	y = rhs.y;
				53	z = rhs.z;
				54	w = rhs.w;
				55
				56	return *this;
				57	}
				58
Alexis Hetu	9651718	2015-04-15 10:30:23 -0400	[diff] [blame]	59	Short4 &Vector4s::operator[](int i)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	60	{
				61	switch(i)
				62	{
				63	case 0: return x;
				64	case 1: return y;
				65	case 2: return z;
				66	case 3: return w;
				67	}
				68
				69	return x;
				70	}
				71
				72	Vector4f::Vector4f()
				73	{
				74	}
				75
				76	Vector4f::Vector4f(float x, float y, float z, float w)
				77	{
				78	this->x = Float4(x);
				79	this->y = Float4(y);
				80	this->z = Float4(z);
				81	this->w = Float4(w);
				82	}
				83
				84	Vector4f::Vector4f(const Vector4f &rhs)
				85	{
				86	x = rhs.x;
				87	y = rhs.y;
				88	z = rhs.z;
				89	w = rhs.w;
				90	}
				91
				92	Vector4f &Vector4f::operator=(const Vector4f &rhs)
				93	{
				94	x = rhs.x;
				95	y = rhs.y;
				96	z = rhs.z;
				97	w = rhs.w;
				98
				99	return *this;
				100	}
				101
				102	Float4 &Vector4f::operator[](int i)
				103	{
				104	switch(i)
				105	{
				106	case 0: return x;
				107	case 1: return y;
				108	case 2: return z;
				109	case 3: return w;
				110	}
				111
				112	return x;
				113	}
				114
				115	Float4 exponential2(RValue<Float4> x, bool pp)
				116	{
Nicolas Capens	41bcdc7	2018-01-11 21:19:34 -0500	[diff] [blame]	117	// This implementation is based on 2^(i + f) = 2^i * 2^f,
				118	// where i is the integer part of x and f is the fraction.
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	119
Nicolas Capens	41bcdc7	2018-01-11 21:19:34 -0500	[diff] [blame]	120	// For 2^i we can put the integer part directly in the exponent of
				121	// the IEEE-754 floating-point number. Clamp to prevent overflow
				122	// past the representation of infinity.
				123	Float4 x0 = x;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	124	x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f
				125	x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	126
Nicolas Capens	41bcdc7	2018-01-11 21:19:34 -0500	[diff] [blame]	127	Int4 i = RoundInt(x0 - Float4(0.5f));
				128	Float4 ii = As<Float4>((i + Int4(127)) << 23); // Add single-precision bias, and shift into exponent.
				129
				130	// For the fractional part use a polynomial
				131	// which approximates 2^f in the 0 to 1 range.
				132	Float4 f = x0 - Float4(i);
				133	Float4 ff = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f
				134	ff = ff * f + As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f
				135	ff = ff * f + As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f
				136	ff = ff * f + As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f
				137	ff = ff * f + As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f
				138	ff = ff * f + Float4(1.0f);
				139
				140	return ii * ff;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	141	}
				142
				143	Float4 logarithm2(RValue<Float4> x, bool absolute, bool pp)
				144	{
				145	Float4 x0;
				146	Float4 x1;
				147	Float4 x2;
				148	Float4 x3;
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	149
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	150	x0 = x;
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	151
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	152	x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000));
				153	x1 = As<Float4>(As<UInt4>(x1) >> 8);
				154	x1 = As<Float4>(As<Int4>(x1) \| As<Int4>(Float4(1.0f)));
				155	x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f;
				156	x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) \| As<Int4>(Float4(1.0f)));
				157
				158	x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f);
				159	x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f);
				160	x2 /= x3;
				161
				162	x1 += (x0 - Float4(1.0f)) * x2;
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	163
Alexis Hetu	0b7003b	2017-11-13 16:21:11 -0500	[diff] [blame]	164	Int4 pos_inf_x = CmpEQ(As<Int4>(x), Int4(0x7F800000));
				165	return As<Float4>((pos_inf_x & As<Int4>(x)) \| (~pos_inf_x & As<Int4>(x1)));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	166	}
				167
				168	Float4 exponential(RValue<Float4> x, bool pp)
				169	{
				170	// FIXME: Propagate the constant
Alexis Hetu	0b7003b	2017-11-13 16:21:11 -0500	[diff] [blame]	171	return exponential2(Float4(1.44269504f) * x, pp); // 1/ln(2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	172	}
				173
				174	Float4 logarithm(RValue<Float4> x, bool absolute, bool pp)
				175	{
				176	// FIXME: Propagate the constant
				177	return Float4(6.93147181e-1f) * logarithm2(x, absolute, pp); // ln(2)
				178	}
				179
				180	Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp)
				181	{
				182	Float4 log = logarithm2(x, true, pp);
				183	log *= y;
				184	return exponential2(log, pp);
				185	}
				186
Nicolas Capens	05b3d66	2016-02-25 23:58:33 -0500	[diff] [blame]	187	Float4 reciprocal(RValue<Float4> x, bool pp, bool finite, bool exactAtPow2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	188	{
				189	Float4 rcp;
				190
				191	if(!pp && rcpPrecision >= WHQL)
				192	{
				193	rcp = Float4(1.0f) / x;
				194	}
				195	else
				196	{
Nicolas Capens	05b3d66	2016-02-25 23:58:33 -0500	[diff] [blame]	197	rcp = Rcp_pp(x, exactAtPow2);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	198
				199	if(!pp)
				200	{
				201	rcp = (rcp + rcp) - (x * rcp * rcp);
				202	}
				203	}
				204
				205	if(finite)
				206	{
				207	int big = 0x7F7FFFFF;
				208	rcp = Min(rcp, Float4((float&)big));
				209	}
				210
				211	return rcp;
				212	}
				213
				214	Float4 reciprocalSquareRoot(RValue<Float4> x, bool absolute, bool pp)
				215	{
				216	Float4 abs = x;
				217
				218	if(absolute)
				219	{
				220	abs = Abs(abs);
				221	}
				222
				223	Float4 rsq;
				224
Alexis Hetu	a0ef97a	2017-11-13 17:31:20 -0500	[diff] [blame]	225	if(!pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	226	{
				227	rsq = Float4(1.0f) / Sqrt(abs);
				228	}
				229	else
				230	{
				231	rsq = RcpSqrt_pp(abs);
				232
				233	if(!pp)
				234	{
				235	rsq = rsq * (Float4(3.0f) - rsq * rsq * abs) * Float4(0.5f);
				236	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	237
Alexis Hetu	a0ef97a	2017-11-13 17:31:20 -0500	[diff] [blame]	238	rsq = As<Float4>(CmpNEQ(As<Int4>(abs), Int4(0x7F800000)) & As<Int4>(rsq));
				239	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	240
				241	return rsq;
				242	}
				243
				244	Float4 modulo(RValue<Float4> x, RValue<Float4> y)
				245	{
				246	return x - y * Floor(x / y);
				247	}
				248
				249	Float4 sine_pi(RValue<Float4> x, bool pp)
				250	{
				251	const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2
				252	const Float4 B = Float4(1.27323954e+0f); // 4/pi
				253	const Float4 C = Float4(7.75160950e-1f);
				254	const Float4 D = Float4(2.24839049e-1f);
				255
				256	// Parabola approximating sine
				257	Float4 sin = x * (Abs(x) * A + B);
				258
				259	// Improve precision from 0.06 to 0.001
				260	if(true)
				261	{
				262	sin = sin * (Abs(sin) * D + C);
				263	}
				264
				265	return sin;
				266	}
				267
				268	Float4 cosine_pi(RValue<Float4> x, bool pp)
				269	{
				270	// cos(x) = sin(x + pi/2)
				271	Float4 y = x + Float4(1.57079632e+0f);
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	272
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	273	// Wrap around
				274	y -= As<Float4>(CmpNLT(y, Float4(3.14159265e+0f)) & As<Int4>(Float4(6.28318530e+0f)));
				275
				276	return sine_pi(y, pp);
				277	}
				278
Antonio Maiorano	3942f5c	2020-04-03 16:46:32 -0400	[diff] [blame]	279	// Assumes x is a finite floating point value
				280	static RValue<Float4> clamp(const Float4 &x, const Float4 &min, const Float4 &max)
				281	{
				282	return Min(Max(x, min), max);
				283	}
				284
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	285	Float4 sine(RValue<Float4> x, bool pp)
				286	{
				287	// Reduce to [-0.5, 0.5] range
				288	Float4 y = x * Float4(1.59154943e-1f); // 1/2pi
				289	y = y - Round(y);
				290
Alexis Hetu	929c6b0	2017-11-07 16:04:25 -0500	[diff] [blame]	291	if(!pp)
				292	{
				293	// From the paper: "A Fast, Vectorizable Algorithm for Producing Single-Precision Sine-Cosine Pairs"
				294	// This implementation passes OpenGL ES 3.0 precision requirements, at the cost of more operations:
				295	// !pp : 17 mul, 7 add, 1 sub, 1 reciprocal
				296	// pp : 4 mul, 2 add, 2 abs
				297
				298	Float4 y2 = y * y;
				299	Float4 c1 = y2 * (y2 * (y2 * Float4(-0.0204391631f) + Float4(0.2536086171f)) + Float4(-1.2336977925f)) + Float4(1.0f);
				300	Float4 s1 = y * (y2 * (y2 * (y2 * Float4(-0.0046075748f) + Float4(0.0796819754f)) + Float4(-0.645963615f)) + Float4(1.5707963235f));
				301	Float4 c2 = (c1 * c1) - (s1 * s1);
				302	Float4 s2 = Float4(2.0f) * s1 * c1;
				303	return Float4(2.0f) * s2 * c2 * reciprocal(s2 * s2 + c2 * c2, pp, true);
				304	}
				305
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	306	const Float4 A = Float4(-16.0f);
				307	const Float4 B = Float4(8.0f);
				308	const Float4 C = Float4(7.75160950e-1f);
				309	const Float4 D = Float4(2.24839049e-1f);
				310
				311	// Parabola approximating sine
				312	Float4 sin = y * (Abs(y) * A + B);
				313
				314	// Improve precision from 0.06 to 0.001
				315	if(true)
				316	{
				317	sin = sin * (Abs(sin) * D + C);
				318	}
				319
Antonio Maiorano	3942f5c	2020-04-03 16:46:32 -0400	[diff] [blame]	320	// TODO(b/151461290): Fix precision loss instead of clamping.
				321	sin = clamp(sin, Float4(-1.0f), Float4(1.0f));
				322
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	323	return sin;
				324	}
				325
				326	Float4 cosine(RValue<Float4> x, bool pp)
				327	{
				328	// cos(x) = sin(x + pi/2)
				329	Float4 y = x + Float4(1.57079632e+0f);
Antonio Maiorano	3942f5c	2020-04-03 16:46:32 -0400	[diff] [blame]	330	auto cos = sine(y, pp);
				331
				332	// TODO(b/151461290): Fix precision loss instead of clamping.
				333	cos = clamp(cos, Float4(-1.0f), Float4(1.0f));
				334
				335	return cos;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	336	}
				337
				338	Float4 tangent(RValue<Float4> x, bool pp)
				339	{
				340	return sine(x, pp) / cosine(x, pp);
				341	}
				342
				343	Float4 arccos(RValue<Float4> x, bool pp)
				344	{
				345	// pi/2 - arcsin(x)
				346	return Float4(1.57079632e+0f) - arcsin(x);
				347	}
				348
				349	Float4 arcsin(RValue<Float4> x, bool pp)
				350	{
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	351	if(false) // Simpler implementation fails even lowp precision tests
				352	{
				353	// x(pi/2-sqrt(1-xx)*pi/5)
				354	return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - xx) Float4(6.28318531e-1f));
				355	}
				356	else
				357	{
				358	// From 4.4.45, page 81 of the Handbook of Mathematical Functions, by Milton Abramowitz and Irene Stegun
				359	const Float4 half_pi(1.57079632f);
				360	const Float4 a0(1.5707288f);
				361	const Float4 a1(-0.2121144f);
				362	const Float4 a2(0.0742610f);
				363	const Float4 a3(-0.0187293f);
				364	Float4 absx = Abs(x);
				365	return As<Float4>(As<Int4>(half_pi - Sqrt(Float4(1.0f) - absx) * (a0 + absx * (a1 + absx * (a2 + absx * a3)))) ^
				366	(As<Int4>(x) & Int4(0x80000000)));
				367	}
				368	}
				369
				370	// Approximation of atan in [0..1]
				371	Float4 arctan_01(Float4 x, bool pp)
				372	{
				373	if(pp)
				374	{
				375	return x * (Float4(-0.27f) * x + Float4(1.05539816f));
				376	}
				377	else
				378	{
				379	// From 4.4.49, page 81 of the Handbook of Mathematical Functions, by Milton Abramowitz and Irene Stegun
				380	const Float4 a2(-0.3333314528f);
				381	const Float4 a4(0.1999355085f);
				382	const Float4 a6(-0.1420889944f);
				383	const Float4 a8(0.1065626393f);
				384	const Float4 a10(-0.0752896400f);
				385	const Float4 a12(0.0429096138f);
				386	const Float4 a14(-0.0161657367f);
				387	const Float4 a16(0.0028662257f);
				388	Float4 x2 = x * x;
				389	return (x + x * (x2 * (a2 + x2 * (a4 + x2 * (a6 + x2 * (a8 + x2 * (a10 + x2 * (a12 + x2 * (a14 + x2 * a16)))))))));
				390	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	391	}
				392
				393	Float4 arctan(RValue<Float4> x, bool pp)
				394	{
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	395	Float4 absx = Abs(x);
				396	Int4 O = CmpNLT(absx, Float4(1.0f));
				397	Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) \| (~O & As<Int4>(absx))); // FIXME: Vector select
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	398
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	399	const Float4 half_pi(1.57079632f);
				400	Float4 theta = arctan_01(y, pp);
				401	return As<Float4>(((O & As<Int4>(half_pi - theta)) \| (~O & As<Int4>(theta))) ^ // FIXME: Vector select
				402	(As<Int4>(x) & Int4(0x80000000)));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	403	}
				404
				405	Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp)
				406	{
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	407	const Float4 pi(3.14159265f); // pi
				408	const Float4 minus_pi(-3.14159265f); // -pi
				409	const Float4 half_pi(1.57079632f); // pi/2
				410	const Float4 quarter_pi(7.85398163e-1f); // pi/4
				411
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	412	// Rotate to upper semicircle when in lower semicircle
				413	Int4 S = CmpLT(y, Float4(0.0f));
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	414	Float4 theta = As<Float4>(S & As<Int4>(minus_pi));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	415	Float4 x0 = As<Float4>((As<Int4>(y) & Int4(0x80000000)) ^ As<Int4>(x));
				416	Float4 y0 = Abs(y);
				417
				418	// Rotate to right quadrant when in left quadrant
Alexis Hetu	596f653	2018-05-15 14:07:19 -0400	[diff] [blame]	419	Int4 Q = CmpLT(x0, Float4(0.0f));
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	420	theta += As<Float4>(Q & As<Int4>(half_pi));
				421	Float4 x1 = As<Float4>((Q & As<Int4>(y0)) \| (~Q & As<Int4>(x0))); // FIXME: Vector select
				422	Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) \| (~Q & As<Int4>(y0))); // FIXME: Vector select
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	423
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	424	// Mirror to first octant when in second octant
Alexis Hetu	596f653	2018-05-15 14:07:19 -0400	[diff] [blame]	425	Int4 O = CmpNLT(y1, x1);
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	426	Float4 x2 = As<Float4>((O & As<Int4>(y1)) \| (~O & As<Int4>(x1))); // FIXME: Vector select
				427	Float4 y2 = As<Float4>((O & As<Int4>(x1)) \| (~O & As<Int4>(y1))); // FIXME: Vector select
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	428
				429	// Approximation of atan in [0..1]
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	430	Int4 zero_x = CmpEQ(x2, Float4(0.0f));
				431	Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4
				432	Float4 atan2_theta = arctan_01(y2 / x2, pp);
Alexis Hetu	596f653	2018-05-15 14:07:19 -0400	[diff] [blame]	433	theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) \| (~O & (As<Int4>(atan2_theta))))) \| // FIXME: Vector select
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	434	(inf_y & As<Int4>(quarter_pi)));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	435
Alexis Hetu	1728dde	2017-11-08 13:43:16 -0500	[diff] [blame]	436	// Recover loss of precision for tiny theta angles
				437	Int4 precision_loss = S & Q & O & ~inf_y; // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta
				438	return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) \| (~precision_loss & As<Int4>(theta))); // FIXME: Vector select
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	439	}
				440
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	441	Float4 sineh(RValue<Float4> x, bool pp)
				442	{
				443	return (exponential(x, pp) - exponential(-x, pp)) * Float4(0.5f);
				444	}
				445
				446	Float4 cosineh(RValue<Float4> x, bool pp)
				447	{
				448	return (exponential(x, pp) + exponential(-x, pp)) * Float4(0.5f);
				449	}
				450
				451	Float4 tangenth(RValue<Float4> x, bool pp)
				452	{
				453	Float4 e_x = exponential(x, pp);
				454	Float4 e_minus_x = exponential(-x, pp);
				455	return (e_x - e_minus_x) / (e_x + e_minus_x);
				456	}
				457
				458	Float4 arccosh(RValue<Float4> x, bool pp)
				459	{
				460	return logarithm(x + Sqrt(x + Float4(1.0f)) * Sqrt(x - Float4(1.0f)), pp);
				461	}
				462
				463	Float4 arcsinh(RValue<Float4> x, bool pp)
				464	{
				465	return logarithm(x + Sqrt(x * x + Float4(1.0f)), pp);
				466	}
				467
				468	Float4 arctanh(RValue<Float4> x, bool pp)
				469	{
				470	return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f);
				471	}
				472
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	473	Float4 dot2(const Vector4f &v0, const Vector4f &v1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	474	{
				475	return v0.x * v1.x + v0.y * v1.y;
				476	}
				477
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	478	Float4 dot3(const Vector4f &v0, const Vector4f &v1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	479	{
				480	return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
				481	}
				482
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	483	Float4 dot4(const Vector4f &v0, const Vector4f &v1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	484	{
				485	return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w;
				486	}
				487
				488	void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
				489	{
				490	Int2 tmp0 = UnpackHigh(row0, row1);
				491	Int2 tmp1 = UnpackHigh(row2, row3);
				492	Int2 tmp2 = UnpackLow(row0, row1);
				493	Int2 tmp3 = UnpackLow(row2, row3);
				494
Nicolas Capens	45f187a	2016-12-02 15:30:56 -0500	[diff] [blame]	495	row0 = UnpackLow(tmp2, tmp3);
				496	row1 = UnpackHigh(tmp2, tmp3);
				497	row2 = UnpackLow(tmp0, tmp1);
				498	row3 = UnpackHigh(tmp0, tmp1);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	499	}
				500
Nicolas Capens	e4a88b9	2017-11-30 00:14:57 -0500	[diff] [blame]	501	void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
				502	{
				503	Int2 tmp0 = UnpackHigh(row0, row1);
				504	Int2 tmp1 = UnpackHigh(row2, row3);
				505	Int2 tmp2 = UnpackLow(row0, row1);
				506	Int2 tmp3 = UnpackLow(row2, row3);
				507
				508	row0 = UnpackLow(tmp2, tmp3);
				509	row1 = UnpackHigh(tmp2, tmp3);
				510	row2 = UnpackLow(tmp0, tmp1);
				511	}
				512
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	513	void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				514	{
				515	Float4 tmp0 = UnpackLow(row0, row1);
				516	Float4 tmp1 = UnpackLow(row2, row3);
				517	Float4 tmp2 = UnpackHigh(row0, row1);
				518	Float4 tmp3 = UnpackHigh(row2, row3);
				519
				520	row0 = Float4(tmp0.xy, tmp1.xy);
				521	row1 = Float4(tmp0.zw, tmp1.zw);
				522	row2 = Float4(tmp2.xy, tmp3.xy);
				523	row3 = Float4(tmp2.zw, tmp3.zw);
				524	}
				525
				526	void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				527	{
				528	Float4 tmp0 = UnpackLow(row0, row1);
				529	Float4 tmp1 = UnpackLow(row2, row3);
				530	Float4 tmp2 = UnpackHigh(row0, row1);
				531	Float4 tmp3 = UnpackHigh(row2, row3);
				532
				533	row0 = Float4(tmp0.xy, tmp1.xy);
				534	row1 = Float4(tmp0.zw, tmp1.zw);
				535	row2 = Float4(tmp2.xy, tmp3.xy);
				536	}
				537
				538	void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				539	{
				540	Float4 tmp0 = UnpackLow(row0, row1);
				541	Float4 tmp1 = UnpackLow(row2, row3);
				542
				543	row0 = Float4(tmp0.xy, tmp1.xy);
				544	row1 = Float4(tmp0.zw, tmp1.zw);
				545	}
				546
				547	void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				548	{
				549	Float4 tmp0 = UnpackLow(row0, row1);
				550	Float4 tmp1 = UnpackLow(row2, row3);
				551
				552	row0 = Float4(tmp0.xy, tmp1.xy);
				553	}
				554
				555	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
				556	{
Nicolas Capens	54ac5e8	2016-12-09 14:07:50 -0500	[diff] [blame]	557	Float4 tmp01 = UnpackLow(row0, row1);
				558	Float4 tmp23 = UnpackHigh(row0, row1);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	559
Nicolas Capens	54ac5e8	2016-12-09 14:07:50 -0500	[diff] [blame]	560	row0 = tmp01;
				561	row1 = Float4(tmp01.zw, row1.zw);
				562	row2 = tmp23;
				563	row3 = Float4(tmp23.zw, row3.zw);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	564	}
				565
				566	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N)
				567	{
				568	switch(N)
				569	{
				570	case 1: transpose4x1(row0, row1, row2, row3); break;
				571	case 2: transpose4x2(row0, row1, row2, row3); break;
				572	case 3: transpose4x3(row0, row1, row2, row3); break;
				573	case 4: transpose4x4(row0, row1, row2, row3); break;
				574	}
				575	}
				576
Nicolas Capens	5bff405	2018-05-28 13:18:59 -0400	[diff] [blame]	577	const Vector4f RegisterFile::operator[](RValue<Int4> index)
				578	{
				579	ASSERT(indirectAddressable);
				580
				581	Int index0 = Extract(index, 0);
				582	Int index1 = Extract(index, 1);
				583	Int index2 = Extract(index, 2);
				584	Int index3 = Extract(index, 3);
				585
				586	Vector4f r;
				587
				588	r.x.x = Extract(x[0][index0], 0);
				589	r.x.y = Extract(x[0][index1], 1);
				590	r.x.z = Extract(x[0][index2], 2);
				591	r.x.w = Extract(x[0][index3], 3);
				592
				593	r.y.x = Extract(y[0][index0], 0);
				594	r.y.y = Extract(y[0][index1], 1);
				595	r.y.z = Extract(y[0][index2], 2);
				596	r.y.w = Extract(y[0][index3], 3);
				597
				598	r.z.x = Extract(z[0][index0], 0);
				599	r.z.y = Extract(z[0][index1], 1);
				600	r.z.z = Extract(z[0][index2], 2);
				601	r.z.w = Extract(z[0][index3], 3);
				602
				603	r.w.x = Extract(w[0][index0], 0);
				604	r.w.y = Extract(w[0][index1], 1);
				605	r.w.z = Extract(w[0][index2], 2);
				606	r.w.w = Extract(w[0][index3], 3);
				607
				608	return r;
				609	}
				610
				611	void RegisterFile::scatter_x(Int4 index, RValue<Float4> r)
				612	{
				613	ASSERT(indirectAddressable);
				614
				615	Int index0 = Extract(index, 0);
				616	Int index1 = Extract(index, 1);
				617	Int index2 = Extract(index, 2);
				618	Int index3 = Extract(index, 3);
				619
				620	x[0][index0] = Insert(x[0][index0], Extract(r, 0), 0);
				621	x[0][index1] = Insert(x[0][index1], Extract(r, 1), 1);
				622	x[0][index2] = Insert(x[0][index2], Extract(r, 2), 2);
				623	x[0][index3] = Insert(x[0][index3], Extract(r, 3), 3);
				624	}
				625
				626	void RegisterFile::scatter_y(Int4 index, RValue<Float4> r)
				627	{
				628	ASSERT(indirectAddressable);
				629
				630	Int index0 = Extract(index, 0);
				631	Int index1 = Extract(index, 1);
				632	Int index2 = Extract(index, 2);
				633	Int index3 = Extract(index, 3);
				634
				635	y[0][index0] = Insert(y[0][index0], Extract(r, 0), 0);
				636	y[0][index1] = Insert(y[0][index1], Extract(r, 1), 1);
				637	y[0][index2] = Insert(y[0][index2], Extract(r, 2), 2);
				638	y[0][index3] = Insert(y[0][index3], Extract(r, 3), 3);
				639	}
				640
				641	void RegisterFile::scatter_z(Int4 index, RValue<Float4> r)
				642	{
				643	ASSERT(indirectAddressable);
				644
				645	Int index0 = Extract(index, 0);
				646	Int index1 = Extract(index, 1);
				647	Int index2 = Extract(index, 2);
				648	Int index3 = Extract(index, 3);
				649
				650	z[0][index0] = Insert(z[0][index0], Extract(r, 0), 0);
				651	z[0][index1] = Insert(z[0][index1], Extract(r, 1), 1);
				652	z[0][index2] = Insert(z[0][index2], Extract(r, 2), 2);
				653	z[0][index3] = Insert(z[0][index3], Extract(r, 3), 3);
				654	}
				655
				656	void RegisterFile::scatter_w(Int4 index, RValue<Float4> r)
				657	{
				658	ASSERT(indirectAddressable);
				659
				660	Int index0 = Extract(index, 0);
				661	Int index1 = Extract(index, 1);
				662	Int index2 = Extract(index, 2);
				663	Int index3 = Extract(index, 3);
				664
				665	w[0][index0] = Insert(w[0][index0], Extract(r, 0), 0);
				666	w[0][index1] = Insert(w[0][index1], Extract(r, 1), 1);
				667	w[0][index2] = Insert(w[0][index2], Extract(r, 2), 2);
				668	w[0][index3] = Insert(w[0][index3], Extract(r, 3), 3);
				669	}
				670
Alexis Hetu	02a2bb8	2015-08-20 14:10:33 -0400	[diff] [blame]	671	void ShaderCore::mov(Vector4f &dst, const Vector4f &src, bool integerDestination)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	672	{
Alexis Hetu	02a2bb8	2015-08-20 14:10:33 -0400	[diff] [blame]	673	if(integerDestination)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	674	{
Alexis Hetu	02a2bb8	2015-08-20 14:10:33 -0400	[diff] [blame]	675	dst.x = As<Float4>(RoundInt(src.x));
				676	dst.y = As<Float4>(RoundInt(src.y));
				677	dst.z = As<Float4>(RoundInt(src.z));
				678	dst.w = As<Float4>(RoundInt(src.w));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	679	}
				680	else
				681	{
				682	dst = src;
				683	}
				684	}
				685
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	686	void ShaderCore::neg(Vector4f &dst, const Vector4f &src)
				687	{
				688	dst.x = -src.x;
				689	dst.y = -src.y;
				690	dst.z = -src.z;
				691	dst.w = -src.w;
				692	}
				693
				694	void ShaderCore::ineg(Vector4f &dst, const Vector4f &src)
				695	{
				696	dst.x = As<Float4>(-As<Int4>(src.x));
				697	dst.y = As<Float4>(-As<Int4>(src.y));
				698	dst.z = As<Float4>(-As<Int4>(src.z));
				699	dst.w = As<Float4>(-As<Int4>(src.w));
				700	}
				701
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	702	void ShaderCore::f2b(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	703	{
				704	dst.x = As<Float4>(CmpNEQ(src.x, Float4(0.0f)));
				705	dst.y = As<Float4>(CmpNEQ(src.y, Float4(0.0f)));
				706	dst.z = As<Float4>(CmpNEQ(src.z, Float4(0.0f)));
				707	dst.w = As<Float4>(CmpNEQ(src.w, Float4(0.0f)));
				708	}
				709
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	710	void ShaderCore::b2f(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	711	{
				712	dst.x = As<Float4>(As<Int4>(src.x) & As<Int4>(Float4(1.0f)));
				713	dst.y = As<Float4>(As<Int4>(src.y) & As<Int4>(Float4(1.0f)));
				714	dst.z = As<Float4>(As<Int4>(src.z) & As<Int4>(Float4(1.0f)));
				715	dst.w = As<Float4>(As<Int4>(src.w) & As<Int4>(Float4(1.0f)));
				716	}
				717
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	718	void ShaderCore::f2i(Vector4f &dst, const Vector4f &src)
				719	{
				720	dst.x = As<Float4>(Int4(src.x));
				721	dst.y = As<Float4>(Int4(src.y));
				722	dst.z = As<Float4>(Int4(src.z));
				723	dst.w = As<Float4>(Int4(src.w));
				724	}
				725
				726	void ShaderCore::i2f(Vector4f &dst, const Vector4f &src)
				727	{
				728	dst.x = Float4(As<Int4>(src.x));
				729	dst.y = Float4(As<Int4>(src.y));
				730	dst.z = Float4(As<Int4>(src.z));
				731	dst.w = Float4(As<Int4>(src.w));
				732	}
				733
				734	void ShaderCore::f2u(Vector4f &dst, const Vector4f &src)
				735	{
				736	dst.x = As<Float4>(UInt4(src.x));
				737	dst.y = As<Float4>(UInt4(src.y));
				738	dst.z = As<Float4>(UInt4(src.z));
				739	dst.w = As<Float4>(UInt4(src.w));
				740	}
				741
				742	void ShaderCore::u2f(Vector4f &dst, const Vector4f &src)
				743	{
				744	dst.x = Float4(As<UInt4>(src.x));
				745	dst.y = Float4(As<UInt4>(src.y));
				746	dst.z = Float4(As<UInt4>(src.z));
				747	dst.w = Float4(As<UInt4>(src.w));
				748	}
				749
				750	void ShaderCore::i2b(Vector4f &dst, const Vector4f &src)
				751	{
				752	dst.x = As<Float4>(CmpNEQ(As<Int4>(src.x), Int4(0)));
				753	dst.y = As<Float4>(CmpNEQ(As<Int4>(src.y), Int4(0)));
				754	dst.z = As<Float4>(CmpNEQ(As<Int4>(src.z), Int4(0)));
				755	dst.w = As<Float4>(CmpNEQ(As<Int4>(src.w), Int4(0)));
				756	}
				757
				758	void ShaderCore::b2i(Vector4f &dst, const Vector4f &src)
				759	{
				760	dst.x = As<Float4>(As<Int4>(src.x) & Int4(1));
				761	dst.y = As<Float4>(As<Int4>(src.y) & Int4(1));
				762	dst.z = As<Float4>(As<Int4>(src.z) & Int4(1));
				763	dst.w = As<Float4>(As<Int4>(src.w) & Int4(1));
				764	}
				765
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	766	void ShaderCore::add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	767	{
				768	dst.x = src0.x + src1.x;
				769	dst.y = src0.y + src1.y;
				770	dst.z = src0.z + src1.z;
				771	dst.w = src0.w + src1.w;
				772	}
				773
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	774	void ShaderCore::iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				775	{
				776	dst.x = As<Float4>(As<Int4>(src0.x) + As<Int4>(src1.x));
				777	dst.y = As<Float4>(As<Int4>(src0.y) + As<Int4>(src1.y));
				778	dst.z = As<Float4>(As<Int4>(src0.z) + As<Int4>(src1.z));
				779	dst.w = As<Float4>(As<Int4>(src0.w) + As<Int4>(src1.w));
				780	}
				781
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	782	void ShaderCore::sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	783	{
				784	dst.x = src0.x - src1.x;
				785	dst.y = src0.y - src1.y;
				786	dst.z = src0.z - src1.z;
				787	dst.w = src0.w - src1.w;
				788	}
				789
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	790	void ShaderCore::isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				791	{
				792	dst.x = As<Float4>(As<Int4>(src0.x) - As<Int4>(src1.x));
				793	dst.y = As<Float4>(As<Int4>(src0.y) - As<Int4>(src1.y));
				794	dst.z = As<Float4>(As<Int4>(src0.z) - As<Int4>(src1.z));
				795	dst.w = As<Float4>(As<Int4>(src0.w) - As<Int4>(src1.w));
				796	}
				797
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	798	void ShaderCore::mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	799	{
				800	dst.x = src0.x * src1.x + src2.x;
				801	dst.y = src0.y * src1.y + src2.y;
				802	dst.z = src0.z * src1.z + src2.z;
				803	dst.w = src0.w * src1.w + src2.w;
				804	}
				805
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	806	void ShaderCore::imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
				807	{
				808	dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x) + As<Int4>(src2.x));
				809	dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y) + As<Int4>(src2.y));
				810	dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z) + As<Int4>(src2.z));
				811	dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w) + As<Int4>(src2.w));
				812	}
				813
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	814	void ShaderCore::mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	815	{
				816	dst.x = src0.x * src1.x;
				817	dst.y = src0.y * src1.y;
				818	dst.z = src0.z * src1.z;
				819	dst.w = src0.w * src1.w;
				820	}
				821
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	822	void ShaderCore::imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				823	{
				824	dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x));
				825	dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y));
				826	dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z));
				827	dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w));
				828	}
				829
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	830	void ShaderCore::rcpx(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	831	{
Nicolas Capens	af13df4	2018-01-09 16:27:15 -0500	[diff] [blame]	832	Float4 rcp = reciprocal(src.x, pp, true, true);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	833
				834	dst.x = rcp;
				835	dst.y = rcp;
				836	dst.z = rcp;
				837	dst.w = rcp;
				838	}
				839
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	840	void ShaderCore::div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	841	{
				842	dst.x = src0.x / src1.x;
				843	dst.y = src0.y / src1.y;
				844	dst.z = src0.z / src1.z;
				845	dst.w = src0.w / src1.w;
				846	}
				847
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	848	void ShaderCore::idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				849	{
				850	Float4 intMax(As<Float4>(Int4(INT_MAX)));
				851	cmp0i(dst.x, src1.x, intMax, src1.x);
				852	dst.x = As<Float4>(As<Int4>(src0.x) / As<Int4>(dst.x));
				853	cmp0i(dst.y, src1.y, intMax, src1.y);
				854	dst.y = As<Float4>(As<Int4>(src0.y) / As<Int4>(dst.y));
				855	cmp0i(dst.z, src1.z, intMax, src1.z);
				856	dst.z = As<Float4>(As<Int4>(src0.z) / As<Int4>(dst.z));
				857	cmp0i(dst.w, src1.w, intMax, src1.w);
				858	dst.w = As<Float4>(As<Int4>(src0.w) / As<Int4>(dst.w));
				859	}
				860
				861	void ShaderCore::udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				862	{
				863	Float4 uintMax(As<Float4>(UInt4(UINT_MAX)));
				864	cmp0i(dst.x, src1.x, uintMax, src1.x);
				865	dst.x = As<Float4>(As<UInt4>(src0.x) / As<UInt4>(dst.x));
				866	cmp0i(dst.y, src1.y, uintMax, src1.y);
				867	dst.y = As<Float4>(As<UInt4>(src0.y) / As<UInt4>(dst.y));
				868	cmp0i(dst.z, src1.z, uintMax, src1.z);
				869	dst.z = As<Float4>(As<UInt4>(src0.z) / As<UInt4>(dst.z));
				870	cmp0i(dst.w, src1.w, uintMax, src1.w);
				871	dst.w = As<Float4>(As<UInt4>(src0.w) / As<UInt4>(dst.w));
				872	}
				873
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	874	void ShaderCore::mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	875	{
				876	dst.x = modulo(src0.x, src1.x);
				877	dst.y = modulo(src0.y, src1.y);
				878	dst.z = modulo(src0.z, src1.z);
				879	dst.w = modulo(src0.w, src1.w);
				880	}
				881
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	882	void ShaderCore::imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				883	{
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	884	Float4 intMax(As<Float4>(Int4(INT_MAX)));
				885	cmp0i(dst.x, src1.x, intMax, src1.x);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	886	dst.x = As<Float4>(As<Int4>(src0.x) % As<Int4>(dst.x));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	887	cmp0i(dst.y, src1.y, intMax, src1.y);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	888	dst.y = As<Float4>(As<Int4>(src0.y) % As<Int4>(dst.y));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	889	cmp0i(dst.z, src1.z, intMax, src1.z);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	890	dst.z = As<Float4>(As<Int4>(src0.z) % As<Int4>(dst.z));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	891	cmp0i(dst.w, src1.w, intMax, src1.w);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	892	dst.w = As<Float4>(As<Int4>(src0.w) % As<Int4>(dst.w));
				893	}
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	894
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	895	void ShaderCore::umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				896	{
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	897	Float4 uintMax(As<Float4>(UInt4(UINT_MAX)));
				898	cmp0i(dst.x, src1.x, uintMax, src1.x);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	899	dst.x = As<Float4>(As<UInt4>(src0.x) % As<UInt4>(dst.x));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	900	cmp0i(dst.y, src1.y, uintMax, src1.y);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	901	dst.y = As<Float4>(As<UInt4>(src0.y) % As<UInt4>(dst.y));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	902	cmp0i(dst.z, src1.z, uintMax, src1.z);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	903	dst.z = As<Float4>(As<UInt4>(src0.z) % As<UInt4>(dst.z));
Alexis Hetu	2895810	2017-10-02 13:48:19 -0400	[diff] [blame]	904	cmp0i(dst.w, src1.w, uintMax, src1.w);
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	905	dst.w = As<Float4>(As<UInt4>(src0.w) % As<UInt4>(dst.w));
				906	}
				907
				908	void ShaderCore::shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				909	{
				910	dst.x = As<Float4>(As<Int4>(src0.x) << As<Int4>(src1.x));
				911	dst.y = As<Float4>(As<Int4>(src0.y) << As<Int4>(src1.y));
				912	dst.z = As<Float4>(As<Int4>(src0.z) << As<Int4>(src1.z));
				913	dst.w = As<Float4>(As<Int4>(src0.w) << As<Int4>(src1.w));
				914	}
				915
				916	void ShaderCore::ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				917	{
				918	dst.x = As<Float4>(As<Int4>(src0.x) >> As<Int4>(src1.x));
				919	dst.y = As<Float4>(As<Int4>(src0.y) >> As<Int4>(src1.y));
				920	dst.z = As<Float4>(As<Int4>(src0.z) >> As<Int4>(src1.z));
				921	dst.w = As<Float4>(As<Int4>(src0.w) >> As<Int4>(src1.w));
				922	}
				923
				924	void ShaderCore::ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				925	{
				926	dst.x = As<Float4>(As<UInt4>(src0.x) >> As<UInt4>(src1.x));
				927	dst.y = As<Float4>(As<UInt4>(src0.y) >> As<UInt4>(src1.y));
				928	dst.z = As<Float4>(As<UInt4>(src0.z) >> As<UInt4>(src1.z));
				929	dst.w = As<Float4>(As<UInt4>(src0.w) >> As<UInt4>(src1.w));
				930	}
				931
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	932	void ShaderCore::rsqx(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	933	{
				934	Float4 rsq = reciprocalSquareRoot(src.x, true, pp);
				935
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	936	dst.x = rsq;
				937	dst.y = rsq;
				938	dst.z = rsq;
				939	dst.w = rsq;
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	940	}
				941
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	942	void ShaderCore::sqrt(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	943	{
				944	dst.x = Sqrt(src.x);
				945	dst.y = Sqrt(src.y);
				946	dst.z = Sqrt(src.z);
				947	dst.w = Sqrt(src.w);
				948	}
				949
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	950	void ShaderCore::rsq(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	951	{
				952	dst.x = reciprocalSquareRoot(src.x, false, pp);
				953	dst.y = reciprocalSquareRoot(src.y, false, pp);
				954	dst.z = reciprocalSquareRoot(src.z, false, pp);
				955	dst.w = reciprocalSquareRoot(src.w, false, pp);
				956	}
				957
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	958	void ShaderCore::len2(Float4 &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	959	{
				960	dst = Sqrt(dot2(src, src));
				961	}
				962
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	963	void ShaderCore::len3(Float4 &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	964	{
				965	dst = Sqrt(dot3(src, src));
				966	}
				967
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	968	void ShaderCore::len4(Float4 &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	969	{
				970	dst = Sqrt(dot4(src, src));
				971	}
				972
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	973	void ShaderCore::dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	974	{
				975	dst = Abs(src0.x - src1.x);
				976	}
				977
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	978	void ShaderCore::dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	979	{
				980	Float4 dx = src0.x - src1.x;
				981	Float4 dy = src0.y - src1.y;
				982	Float4 dot2 = dx * dx + dy * dy;
				983	dst = Sqrt(dot2);
				984	}
				985
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	986	void ShaderCore::dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	987	{
				988	Float4 dx = src0.x - src1.x;
				989	Float4 dy = src0.y - src1.y;
				990	Float4 dz = src0.z - src1.z;
				991	Float4 dot3 = dx * dx + dy * dy + dz * dz;
				992	dst = Sqrt(dot3);
				993	}
				994
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	995	void ShaderCore::dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	996	{
				997	Float4 dx = src0.x - src1.x;
				998	Float4 dy = src0.y - src1.y;
				999	Float4 dz = src0.z - src1.z;
				1000	Float4 dw = src0.w - src1.w;
				1001	Float4 dot4 = dx * dx + dy * dy + dz * dz + dw * dw;
				1002	dst = Sqrt(dot4);
				1003	}
				1004
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1005	void ShaderCore::dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1006	{
				1007	Float4 t = src0.x * src1.x;
				1008
				1009	dst.x = t;
				1010	dst.y = t;
				1011	dst.z = t;
				1012	dst.w = t;
				1013	}
				1014
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1015	void ShaderCore::dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1016	{
				1017	Float4 t = dot2(src0, src1);
				1018
				1019	dst.x = t;
				1020	dst.y = t;
				1021	dst.z = t;
				1022	dst.w = t;
				1023	}
				1024
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1025	void ShaderCore::dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1026	{
				1027	Float4 t = dot2(src0, src1) + src2.x;
				1028
				1029	dst.x = t;
				1030	dst.y = t;
				1031	dst.z = t;
				1032	dst.w = t;
				1033	}
				1034
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1035	void ShaderCore::dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1036	{
				1037	Float4 dot = dot3(src0, src1);
				1038
				1039	dst.x = dot;
				1040	dst.y = dot;
				1041	dst.z = dot;
				1042	dst.w = dot;
				1043	}
				1044
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1045	void ShaderCore::dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1046	{
				1047	Float4 dot = dot4(src0, src1);
				1048
				1049	dst.x = dot;
				1050	dst.y = dot;
				1051	dst.z = dot;
				1052	dst.w = dot;
				1053	}
				1054
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1055	void ShaderCore::min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1056	{
				1057	dst.x = Min(src0.x, src1.x);
				1058	dst.y = Min(src0.y, src1.y);
				1059	dst.z = Min(src0.z, src1.z);
				1060	dst.w = Min(src0.w, src1.w);
				1061	}
				1062
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1063	void ShaderCore::imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				1064	{
				1065	dst.x = As<Float4>(Min(As<Int4>(src0.x), As<Int4>(src1.x)));
				1066	dst.y = As<Float4>(Min(As<Int4>(src0.y), As<Int4>(src1.y)));
				1067	dst.z = As<Float4>(Min(As<Int4>(src0.z), As<Int4>(src1.z)));
				1068	dst.w = As<Float4>(Min(As<Int4>(src0.w), As<Int4>(src1.w)));
				1069	}
				1070
				1071	void ShaderCore::umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				1072	{
				1073	dst.x = As<Float4>(Min(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1074	dst.y = As<Float4>(Min(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1075	dst.z = As<Float4>(Min(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1076	dst.w = As<Float4>(Min(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1077	}
				1078
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1079	void ShaderCore::max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1080	{
				1081	dst.x = Max(src0.x, src1.x);
				1082	dst.y = Max(src0.y, src1.y);
				1083	dst.z = Max(src0.z, src1.z);
				1084	dst.w = Max(src0.w, src1.w);
				1085	}
				1086
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1087	void ShaderCore::imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				1088	{
				1089	dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x)));
				1090	dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y)));
				1091	dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z)));
				1092	dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w)));
				1093	}
				1094
				1095	void ShaderCore::umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				1096	{
				1097	dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x)));
				1098	dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y)));
				1099	dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z)));
				1100	dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w)));
				1101	}
				1102
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1103	void ShaderCore::slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1104	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1105	dst.x = As<Float4>(As<Int4>(CmpLT(src0.x, src1.x)) & As<Int4>(Float4(1.0f)));
				1106	dst.y = As<Float4>(As<Int4>(CmpLT(src0.y, src1.y)) & As<Int4>(Float4(1.0f)));
				1107	dst.z = As<Float4>(As<Int4>(CmpLT(src0.z, src1.z)) & As<Int4>(Float4(1.0f)));
				1108	dst.w = As<Float4>(As<Int4>(CmpLT(src0.w, src1.w)) & As<Int4>(Float4(1.0f)));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1109	}
				1110
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1111	void ShaderCore::step(Vector4f &dst, const Vector4f &edge, const Vector4f &x)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1112	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1113	dst.x = As<Float4>(CmpNLT(x.x, edge.x) & As<Int4>(Float4(1.0f)));
				1114	dst.y = As<Float4>(CmpNLT(x.y, edge.y) & As<Int4>(Float4(1.0f)));
				1115	dst.z = As<Float4>(CmpNLT(x.z, edge.z) & As<Int4>(Float4(1.0f)));
				1116	dst.w = As<Float4>(CmpNLT(x.w, edge.w) & As<Int4>(Float4(1.0f)));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1117	}
				1118
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1119	void ShaderCore::exp2x(Vector4f &dst, const Vector4f &src, bool pp)
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1120	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1121	Float4 exp = exponential2(src.x, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1122
				1123	dst.x = exp;
				1124	dst.y = exp;
				1125	dst.z = exp;
				1126	dst.w = exp;
				1127	}
				1128
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1129	void ShaderCore::exp2(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1130	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1131	dst.x = exponential2(src.x, pp);
				1132	dst.y = exponential2(src.y, pp);
				1133	dst.z = exponential2(src.z, pp);
				1134	dst.w = exponential2(src.w, pp);
				1135	}
				1136
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1137	void ShaderCore::exp(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1138	{
				1139	dst.x = exponential(src.x, pp);
				1140	dst.y = exponential(src.y, pp);
				1141	dst.z = exponential(src.z, pp);
				1142	dst.w = exponential(src.w, pp);
				1143	}
				1144
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1145	void ShaderCore::log2x(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1146	{
				1147	Float4 log = logarithm2(src.x, true, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1148
				1149	dst.x = log;
				1150	dst.y = log;
				1151	dst.z = log;
				1152	dst.w = log;
				1153	}
				1154
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1155	void ShaderCore::log2(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1156	{
Alexis Hetu	a781af7	2017-07-06 17:12:47 -0400	[diff] [blame]	1157	dst.x = logarithm2(src.x, false, pp);
				1158	dst.y = logarithm2(src.y, false, pp);
				1159	dst.z = logarithm2(src.z, false, pp);
				1160	dst.w = logarithm2(src.w, false, pp);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1161	}
				1162
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1163	void ShaderCore::log(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1164	{
				1165	dst.x = logarithm(src.x, false, pp);
				1166	dst.y = logarithm(src.y, false, pp);
				1167	dst.z = logarithm(src.z, false, pp);
				1168	dst.w = logarithm(src.w, false, pp);
				1169	}
				1170
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1171	void ShaderCore::lit(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1172	{
				1173	dst.x = Float4(1.0f);
				1174	dst.y = Max(src.x, Float4(0.0f));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1175
				1176	Float4 pow;
				1177
				1178	pow = src.w;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1179	pow = Min(pow, Float4(127.9961f));
				1180	pow = Max(pow, Float4(-127.9961f));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1181
				1182	dst.z = power(src.y, pow);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1183	dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.x, Float4(0.0f)));
				1184	dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.y, Float4(0.0f)));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1185
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1186	dst.w = Float4(1.0f);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1187	}
				1188
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1189	void ShaderCore::att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1190	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1191	// Computes attenuation factors (1, d, d^2, 1/d) assuming src0 = d^2, src1 = 1/d
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1192	dst.x = 1;
				1193	dst.y = src0.y * src1.y;
				1194	dst.z = src0.z;
				1195	dst.w = src1.w;
				1196	}
				1197
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1198	void ShaderCore::lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1199	{
				1200	dst.x = src0.x * (src1.x - src2.x) + src2.x;
				1201	dst.y = src0.y * (src1.y - src2.y) + src2.y;
				1202	dst.z = src0.z * (src1.z - src2.z) + src2.z;
				1203	dst.w = src0.w * (src1.w - src2.w) + src2.w;
				1204	}
				1205
Alexis Hetu	8ef6d10	2017-11-09 15:49:09 -0500	[diff] [blame]	1206	void ShaderCore::isinf(Vector4f &dst, const Vector4f &src)
				1207	{
				1208	dst.x = As<Float4>(IsInf(src.x));
				1209	dst.y = As<Float4>(IsInf(src.y));
				1210	dst.z = As<Float4>(IsInf(src.z));
				1211	dst.w = As<Float4>(IsInf(src.w));
				1212	}
				1213
				1214	void ShaderCore::isnan(Vector4f &dst, const Vector4f &src)
				1215	{
				1216	dst.x = As<Float4>(IsNan(src.x));
				1217	dst.y = As<Float4>(IsNan(src.y));
				1218	dst.z = As<Float4>(IsNan(src.z));
				1219	dst.w = As<Float4>(IsNan(src.w));
				1220	}
				1221
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1222	void ShaderCore::smooth(Vector4f &dst, const Vector4f &edge0, const Vector4f &edge1, const Vector4f &x)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1223	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1224	Float4 tx = Min(Max((x.x - edge0.x) / (edge1.x - edge0.x), Float4(0.0f)), Float4(1.0f)); dst.x = tx * tx * (Float4(3.0f) - Float4(2.0f) * tx);
				1225	Float4 ty = Min(Max((x.y - edge0.y) / (edge1.y - edge0.y), Float4(0.0f)), Float4(1.0f)); dst.y = ty * ty * (Float4(3.0f) - Float4(2.0f) * ty);
				1226	Float4 tz = Min(Max((x.z - edge0.z) / (edge1.z - edge0.z), Float4(0.0f)), Float4(1.0f)); dst.z = tz * tz * (Float4(3.0f) - Float4(2.0f) * tz);
				1227	Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1228	}
				1229
Alexis Hetu	ffb35eb	2016-04-06 18:05:00 -0400	[diff] [blame]	1230	void ShaderCore::floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits)
				1231	{
				1232	static const uint32_t mask_sign = 0x80000000u;
				1233	static const uint32_t mask_round = ~0xfffu;
				1234	static const uint32_t c_f32infty = 255 << 23;
				1235	static const uint32_t c_magic = 15 << 23;
				1236	static const uint32_t c_nanbit = 0x200;
				1237	static const uint32_t c_infty_as_fp16 = 0x7c00;
				1238	static const uint32_t c_clamp = (31 << 23) - 0x1000;
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1239
Alexis Hetu	ffb35eb	2016-04-06 18:05:00 -0400	[diff] [blame]	1240	UInt4 justsign = UInt4(mask_sign) & As<UInt4>(floatBits);
				1241	UInt4 absf = As<UInt4>(floatBits) ^ justsign;
				1242	UInt4 b_isnormal = CmpNLE(UInt4(c_f32infty), absf);
				1243
				1244	// Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
				1245	// instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
				1246	UInt4 joined = ((((As<UInt4>(Min(As<Float4>(absf & UInt4(mask_round)) * As<Float4>(UInt4(c_magic)),
				1247	As<Float4>(UInt4(c_clamp))))) - UInt4(mask_round)) >> 13) & b_isnormal) \|
				1248	((b_isnormal ^ UInt4(0xFFFFFFFF)) & ((CmpNLE(absf, UInt4(c_f32infty)) & UInt4(c_nanbit)) \|
				1249	UInt4(c_infty_as_fp16)));
				1250
				1251	dst = As<Float4>(storeInUpperBits ? As<UInt4>(dst) \| ((joined << 16) \| justsign) : joined \| (justsign >> 16));
				1252	}
				1253
				1254	void ShaderCore::halfToFloatBits(Float4& dst, const Float4& halfBits)
				1255	{
				1256	static const uint32_t mask_nosign = 0x7FFF;
				1257	static const uint32_t magic = (254 - 15) << 23;
				1258	static const uint32_t was_infnan = 0x7BFF;
				1259	static const uint32_t exp_infnan = 255 << 23;
				1260
				1261	UInt4 expmant = As<UInt4>(halfBits) & UInt4(mask_nosign);
				1262	dst = As<Float4>(As<UInt4>(As<Float4>(expmant << 13) * As<Float4>(UInt4(magic))) \|
				1263	((As<UInt4>(halfBits) ^ UInt4(expmant)) << 16) \|
				1264	(CmpNLE(As<UInt4>(expmant), UInt4(was_infnan)) & UInt4(exp_infnan)));
				1265	}
				1266
				1267	void ShaderCore::packHalf2x16(Vector4f &d, const Vector4f &s0)
				1268	{
				1269	// half2 \| half1
				1270	floatToHalfBits(d.x, s0.x, false);
				1271	floatToHalfBits(d.x, s0.y, true);
				1272	}
				1273
				1274	void ShaderCore::unpackHalf2x16(Vector4f &dst, const Vector4f &s0)
				1275	{
				1276	// half2 \| half1
				1277	halfToFloatBits(dst.x, As<Float4>(As<UInt4>(s0.x) & UInt4(0x0000FFFF)));
				1278	halfToFloatBits(dst.y, As<Float4>((As<UInt4>(s0.x) & UInt4(0xFFFF0000)) >> 16));
				1279	}
				1280
Alexis Hetu	9cde974	2016-04-06 13:03:38 -0400	[diff] [blame]	1281	void ShaderCore::packSnorm2x16(Vector4f &d, const Vector4f &s0)
				1282	{
				1283	// round(clamp(c, -1.0, 1.0) * 32767.0)
				1284	d.x = As<Float4>((Int4(Round(Min(Max(s0.x, Float4(-1.0f)), Float4(1.0f)) * Float4(32767.0f))) & Int4(0xFFFF)) \|
				1285	((Int4(Round(Min(Max(s0.y, Float4(-1.0f)), Float4(1.0f)) * Float4(32767.0f))) & Int4(0xFFFF)) << 16));
				1286	}
				1287
				1288	void ShaderCore::packUnorm2x16(Vector4f &d, const Vector4f &s0)
				1289	{
				1290	// round(clamp(c, 0.0, 1.0) * 65535.0)
				1291	d.x = As<Float4>((Int4(Round(Min(Max(s0.x, Float4(0.0f)), Float4(1.0f)) * Float4(65535.0f))) & Int4(0xFFFF)) \|
				1292	((Int4(Round(Min(Max(s0.y, Float4(0.0f)), Float4(1.0f)) * Float4(65535.0f))) & Int4(0xFFFF)) << 16));
				1293	}
				1294
				1295	void ShaderCore::unpackSnorm2x16(Vector4f &dst, const Vector4f &s0)
				1296	{
				1297	// clamp(f / 32727.0, -1.0, 1.0)
				1298	dst.x = Min(Max(Float4(As<Int4>((As<UInt4>(s0.x) & UInt4(0x0000FFFF)) << 16)) * Float4(1.0f / float(0x7FFF0000)), Float4(-1.0f)), Float4(1.0f));
				1299	dst.y = Min(Max(Float4(As<Int4>(As<UInt4>(s0.x) & UInt4(0xFFFF0000))) * Float4(1.0f / float(0x7FFF0000)), Float4(-1.0f)), Float4(1.0f));
				1300	}
				1301
				1302	void ShaderCore::unpackUnorm2x16(Vector4f &dst, const Vector4f &s0)
				1303	{
				1304	// f / 65535.0
				1305	dst.x = Float4((As<UInt4>(s0.x) & UInt4(0x0000FFFF)) << 16) * Float4(1.0f / float(0xFFFF0000));
				1306	dst.y = Float4(As<UInt4>(s0.x) & UInt4(0xFFFF0000)) * Float4(1.0f / float(0xFFFF0000));
				1307	}
				1308
Alexis Hetu	c3d95f3	2015-09-23 12:27:32 -0400	[diff] [blame]	1309	void ShaderCore::det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				1310	{
				1311	dst.x = src0.x * src1.y - src0.y * src1.x;
				1312	dst.y = dst.z = dst.w = dst.x;
				1313	}
				1314
				1315	void ShaderCore::det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
				1316	{
				1317	crs(dst, src1, src2);
				1318	dp3(dst, dst, src0);
				1319	}
				1320
				1321	void ShaderCore::det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3)
				1322	{
				1323	dst.x = src2.z * src3.w - src2.w * src3.z;
				1324	dst.y = src1.w * src3.z - src1.z * src3.w;
				1325	dst.z = src1.z * src2.w - src1.w * src2.z;
				1326	dst.x = src0.x * (src1.y * dst.x + src2.y * dst.y + src3.y * dst.z) -
				1327	src0.y * (src1.x * dst.x + src2.x * dst.y + src3.x * dst.z) +
				1328	src0.z * (src1.x * (src2.y * src3.w - src2.w * src3.y) +
				1329	src2.x * (src1.w * src3.y - src1.y * src3.w) +
				1330	src3.x * (src1.y * src2.w - src1.w * src2.y)) +
				1331	src0.w * (src1.x * (src2.z * src3.y - src2.y * src3.z) +
				1332	src2.x * (src1.y * src3.z - src1.z * src3.y) +
				1333	src3.x * (src1.z * src2.y - src1.y * src2.z));
				1334	dst.y = dst.z = dst.w = dst.x;
				1335	}
				1336
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1337	void ShaderCore::frc(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1338	{
				1339	dst.x = Frac(src.x);
				1340	dst.y = Frac(src.y);
				1341	dst.z = Frac(src.z);
				1342	dst.w = Frac(src.w);
				1343	}
				1344
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1345	void ShaderCore::trunc(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1346	{
				1347	dst.x = Trunc(src.x);
				1348	dst.y = Trunc(src.y);
				1349	dst.z = Trunc(src.z);
				1350	dst.w = Trunc(src.w);
				1351	}
				1352
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1353	void ShaderCore::floor(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1354	{
				1355	dst.x = Floor(src.x);
				1356	dst.y = Floor(src.y);
				1357	dst.z = Floor(src.z);
				1358	dst.w = Floor(src.w);
				1359	}
				1360
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1361	void ShaderCore::round(Vector4f &dst, const Vector4f &src)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1362	{
				1363	dst.x = Round(src.x);
				1364	dst.y = Round(src.y);
				1365	dst.z = Round(src.z);
				1366	dst.w = Round(src.w);
				1367	}
				1368
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1369	void ShaderCore::roundEven(Vector4f &dst, const Vector4f &src)
Alexis Hetu	8e851c1	2015-06-04 11:30:54 -0400	[diff] [blame]	1370	{
				1371	// dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
				1372	// ex.: 1.5: 2 + (0 * 2 - 1) * 1 * 0 = 2
				1373	// 2.5: 3 + (0 * 2 - 1) * 1 * 1 = 2
				1374	// -1.5: -2 + (1 * 2 - 1) * 1 * 0 = -2
				1375	// -2.5: -3 + (1 * 2 - 1) * 1 * 1 = -2
				1376	// Even if the round implementation rounds the other way:
				1377	// 1.5: 1 + (1 * 2 - 1) * 1 * 1 = 2
				1378	// 2.5: 2 + (1 * 2 - 1) * 1 * 0 = 2
				1379	// -1.5: -1 + (0 * 2 - 1) * 1 * 1 = -2
				1380	// -2.5: -2 + (0 * 2 - 1) * 1 * 0 = -2
				1381	round(dst, src);
				1382	dst.x += ((Float4(CmpLT(dst.x, src.x) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.x), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.x) & Int4(1));
				1383	dst.y += ((Float4(CmpLT(dst.y, src.y) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.y), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.y) & Int4(1));
				1384	dst.z += ((Float4(CmpLT(dst.z, src.z) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.z), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.z) & Int4(1));
				1385	dst.w += ((Float4(CmpLT(dst.w, src.w) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.w), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.w) & Int4(1));
				1386	}
				1387
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1388	void ShaderCore::ceil(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1389	{
				1390	dst.x = Ceil(src.x);
				1391	dst.y = Ceil(src.y);
				1392	dst.z = Ceil(src.z);
				1393	dst.w = Ceil(src.w);
				1394	}
				1395
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1396	void ShaderCore::powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1397	{
				1398	Float4 pow = power(src0.x, src1.x, pp);
				1399
				1400	dst.x = pow;
				1401	dst.y = pow;
				1402	dst.z = pow;
				1403	dst.w = pow;
				1404	}
				1405
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1406	void ShaderCore::pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1407	{
				1408	dst.x = power(src0.x, src1.x, pp);
				1409	dst.y = power(src0.y, src1.y, pp);
				1410	dst.z = power(src0.z, src1.z, pp);
				1411	dst.w = power(src0.w, src1.w, pp);
				1412	}
				1413
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1414	void ShaderCore::crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1415	{
				1416	dst.x = src0.y * src1.z - src0.z * src1.y;
				1417	dst.y = src0.z * src1.x - src0.x * src1.z;
				1418	dst.z = src0.x * src1.y - src0.y * src1.x;
				1419	}
				1420
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1421	void ShaderCore::forward1(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1422	{
				1423	Int4 flip = CmpNLT(Nref.x * I.x, Float4(0.0f)) & Int4(0x80000000);
				1424
				1425	dst.x = As<Float4>(flip ^ As<Int4>(N.x));
				1426	}
				1427
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1428	void ShaderCore::forward2(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1429	{
				1430	Int4 flip = CmpNLT(dot2(Nref, I), Float4(0.0f)) & Int4(0x80000000);
				1431
				1432	dst.x = As<Float4>(flip ^ As<Int4>(N.x));
				1433	dst.y = As<Float4>(flip ^ As<Int4>(N.y));
				1434	}
				1435
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1436	void ShaderCore::forward3(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1437	{
				1438	Int4 flip = CmpNLT(dot3(Nref, I), Float4(0.0f)) & Int4(0x80000000);
				1439
				1440	dst.x = As<Float4>(flip ^ As<Int4>(N.x));
				1441	dst.y = As<Float4>(flip ^ As<Int4>(N.y));
				1442	dst.z = As<Float4>(flip ^ As<Int4>(N.z));
				1443	}
				1444
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1445	void ShaderCore::forward4(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1446	{
				1447	Int4 flip = CmpNLT(dot4(Nref, I), Float4(0.0f)) & Int4(0x80000000);
				1448
				1449	dst.x = As<Float4>(flip ^ As<Int4>(N.x));
				1450	dst.y = As<Float4>(flip ^ As<Int4>(N.y));
				1451	dst.z = As<Float4>(flip ^ As<Int4>(N.z));
				1452	dst.w = As<Float4>(flip ^ As<Int4>(N.w));
				1453	}
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1454
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1455	void ShaderCore::reflect1(Vector4f &dst, const Vector4f &I, const Vector4f &N)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1456	{
				1457	Float4 d = N.x * I.x;
				1458
				1459	dst.x = I.x - Float4(2.0f) * d * N.x;
				1460	}
				1461
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1462	void ShaderCore::reflect2(Vector4f &dst, const Vector4f &I, const Vector4f &N)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1463	{
				1464	Float4 d = dot2(N, I);
				1465
				1466	dst.x = I.x - Float4(2.0f) * d * N.x;
				1467	dst.y = I.y - Float4(2.0f) * d * N.y;
				1468	}
				1469
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1470	void ShaderCore::reflect3(Vector4f &dst, const Vector4f &I, const Vector4f &N)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1471	{
				1472	Float4 d = dot3(N, I);
				1473
				1474	dst.x = I.x - Float4(2.0f) * d * N.x;
				1475	dst.y = I.y - Float4(2.0f) * d * N.y;
				1476	dst.z = I.z - Float4(2.0f) * d * N.z;
				1477	}
				1478
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1479	void ShaderCore::reflect4(Vector4f &dst, const Vector4f &I, const Vector4f &N)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1480	{
				1481	Float4 d = dot4(N, I);
				1482
				1483	dst.x = I.x - Float4(2.0f) * d * N.x;
				1484	dst.y = I.y - Float4(2.0f) * d * N.y;
				1485	dst.z = I.z - Float4(2.0f) * d * N.z;
				1486	dst.w = I.w - Float4(2.0f) * d * N.w;
				1487	}
				1488
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1489	void ShaderCore::refract1(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1490	{
				1491	Float4 d = N.x * I.x;
				1492	Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
				1493	Int4 pos = CmpNLT(k, Float4(0.0f));
				1494	Float4 t = (eta * d + Sqrt(k));
				1495
				1496	dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
				1497	}
				1498
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1499	void ShaderCore::refract2(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1500	{
				1501	Float4 d = dot2(N, I);
				1502	Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
				1503	Int4 pos = CmpNLT(k, Float4(0.0f));
				1504	Float4 t = (eta * d + Sqrt(k));
				1505
				1506	dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
				1507	dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
				1508	}
				1509
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1510	void ShaderCore::refract3(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1511	{
				1512	Float4 d = dot3(N, I);
				1513	Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
				1514	Int4 pos = CmpNLT(k, Float4(0.0f));
				1515	Float4 t = (eta * d + Sqrt(k));
				1516
				1517	dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
				1518	dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
				1519	dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z));
				1520	}
				1521
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1522	void ShaderCore::refract4(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1523	{
				1524	Float4 d = dot4(N, I);
				1525	Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d);
				1526	Int4 pos = CmpNLT(k, Float4(0.0f));
				1527	Float4 t = (eta * d + Sqrt(k));
				1528
				1529	dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x));
				1530	dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y));
				1531	dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z));
				1532	dst.w = As<Float4>(pos & As<Int4>(eta * I.w - t * N.w));
				1533	}
				1534
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1535	void ShaderCore::sgn(Vector4f &dst, const Vector4f &src)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1536	{
				1537	sgn(dst.x, src.x);
				1538	sgn(dst.y, src.y);
				1539	sgn(dst.z, src.z);
				1540	sgn(dst.w, src.w);
				1541	}
				1542
Alexis Hetu	0f44807	2016-03-18 10:56:08 -0400	[diff] [blame]	1543	void ShaderCore::isgn(Vector4f &dst, const Vector4f &src)
				1544	{
				1545	isgn(dst.x, src.x);
				1546	isgn(dst.y, src.y);
				1547	isgn(dst.z, src.z);
				1548	isgn(dst.w, src.w);
				1549	}
				1550
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1551	void ShaderCore::abs(Vector4f &dst, const Vector4f &src)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1552	{
				1553	dst.x = Abs(src.x);
				1554	dst.y = Abs(src.y);
				1555	dst.z = Abs(src.z);
				1556	dst.w = Abs(src.w);
				1557	}
Alexis Hetu	0f44807	2016-03-18 10:56:08 -0400	[diff] [blame]	1558
				1559	void ShaderCore::iabs(Vector4f &dst, const Vector4f &src)
				1560	{
				1561	dst.x = As<Float4>(Abs(As<Int4>(src.x)));
				1562	dst.y = As<Float4>(Abs(As<Int4>(src.y)));
				1563	dst.z = As<Float4>(Abs(As<Int4>(src.z)));
				1564	dst.w = As<Float4>(Abs(As<Int4>(src.w)));
				1565	}
				1566
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1567	void ShaderCore::nrm2(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1568	{
				1569	Float4 dot = dot2(src, src);
				1570	Float4 rsq = reciprocalSquareRoot(dot, false, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1571
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1572	dst.x = src.x * rsq;
				1573	dst.y = src.y * rsq;
				1574	dst.z = src.z * rsq;
				1575	dst.w = src.w * rsq;
				1576	}
				1577
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1578	void ShaderCore::nrm3(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1579	{
				1580	Float4 dot = dot3(src, src);
				1581	Float4 rsq = reciprocalSquareRoot(dot, false, pp);
				1582
				1583	dst.x = src.x * rsq;
				1584	dst.y = src.y * rsq;
				1585	dst.z = src.z * rsq;
				1586	dst.w = src.w * rsq;
				1587	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1588
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1589	void ShaderCore::nrm4(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1590	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1591	Float4 dot = dot4(src, src);
				1592	Float4 rsq = reciprocalSquareRoot(dot, false, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1593
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1594	dst.x = src.x * rsq;
				1595	dst.y = src.y * rsq;
				1596	dst.z = src.z * rsq;
				1597	dst.w = src.w * rsq;
				1598	}
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1599
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1600	void ShaderCore::sincos(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1601	{
				1602	dst.x = cosine_pi(src.x, pp);
				1603	dst.y = sine_pi(src.x, pp);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1604	}
				1605
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1606	void ShaderCore::cos(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1607	{
				1608	dst.x = cosine(src.x, pp);
				1609	dst.y = cosine(src.y, pp);
				1610	dst.z = cosine(src.z, pp);
				1611	dst.w = cosine(src.w, pp);
				1612	}
				1613
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1614	void ShaderCore::sin(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1615	{
				1616	dst.x = sine(src.x, pp);
				1617	dst.y = sine(src.y, pp);
				1618	dst.z = sine(src.z, pp);
				1619	dst.w = sine(src.w, pp);
				1620	}
				1621
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1622	void ShaderCore::tan(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1623	{
				1624	dst.x = tangent(src.x, pp);
				1625	dst.y = tangent(src.y, pp);
				1626	dst.z = tangent(src.z, pp);
				1627	dst.w = tangent(src.w, pp);
				1628	}
				1629
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1630	void ShaderCore::acos(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1631	{
				1632	dst.x = arccos(src.x, pp);
				1633	dst.y = arccos(src.y, pp);
				1634	dst.z = arccos(src.z, pp);
				1635	dst.w = arccos(src.w, pp);
				1636	}
				1637
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1638	void ShaderCore::asin(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1639	{
				1640	dst.x = arcsin(src.x, pp);
				1641	dst.y = arcsin(src.y, pp);
				1642	dst.z = arcsin(src.z, pp);
				1643	dst.w = arcsin(src.w, pp);
				1644	}
				1645
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1646	void ShaderCore::atan(Vector4f &dst, const Vector4f &src, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1647	{
				1648	dst.x = arctan(src.x, pp);
				1649	dst.y = arctan(src.y, pp);
				1650	dst.z = arctan(src.z, pp);
				1651	dst.w = arctan(src.w, pp);
				1652	}
				1653
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1654	void ShaderCore::atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1655	{
				1656	dst.x = arctan(src0.x, src1.x, pp);
				1657	dst.y = arctan(src0.y, src1.y, pp);
				1658	dst.z = arctan(src0.z, src1.z, pp);
				1659	dst.w = arctan(src0.w, src1.w, pp);
				1660	}
				1661
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1662	void ShaderCore::cosh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1663	{
				1664	dst.x = cosineh(src.x, pp);
				1665	dst.y = cosineh(src.y, pp);
				1666	dst.z = cosineh(src.z, pp);
				1667	dst.w = cosineh(src.w, pp);
				1668	}
				1669
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1670	void ShaderCore::sinh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1671	{
				1672	dst.x = sineh(src.x, pp);
				1673	dst.y = sineh(src.y, pp);
				1674	dst.z = sineh(src.z, pp);
				1675	dst.w = sineh(src.w, pp);
				1676	}
				1677
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1678	void ShaderCore::tanh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1679	{
				1680	dst.x = tangenth(src.x, pp);
				1681	dst.y = tangenth(src.y, pp);
				1682	dst.z = tangenth(src.z, pp);
				1683	dst.w = tangenth(src.w, pp);
				1684	}
				1685
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1686	void ShaderCore::acosh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1687	{
				1688	dst.x = arccosh(src.x, pp);
				1689	dst.y = arccosh(src.y, pp);
				1690	dst.z = arccosh(src.z, pp);
				1691	dst.w = arccosh(src.w, pp);
				1692	}
				1693
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1694	void ShaderCore::asinh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1695	{
				1696	dst.x = arcsinh(src.x, pp);
				1697	dst.y = arcsinh(src.y, pp);
				1698	dst.z = arcsinh(src.z, pp);
				1699	dst.w = arcsinh(src.w, pp);
				1700	}
				1701
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1702	void ShaderCore::atanh(Vector4f &dst, const Vector4f &src, bool pp)
Alexis Hetu	8b5f3ef	2015-04-16 11:33:34 -0400	[diff] [blame]	1703	{
				1704	dst.x = arctanh(src.x, pp);
				1705	dst.y = arctanh(src.y, pp);
				1706	dst.z = arctanh(src.z, pp);
				1707	dst.w = arctanh(src.w, pp);
				1708	}
				1709
Alexis Hetu	53ad4af	2017-12-06 14:49:07 -0500	[diff] [blame]	1710	void ShaderCore::expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1711	{
Alexis Hetu	53ad4af	2017-12-06 14:49:07 -0500	[diff] [blame]	1712	if(shaderModel < 0x0200)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1713	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1714	Float4 frc = Frac(src.x);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1715	Float4 floor = src.x - frc;
				1716
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1717	dst.x = exponential2(floor, true);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1718	dst.y = frc;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1719	dst.z = exponential2(src.x, true);
				1720	dst.w = Float4(1.0f);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1721	}
				1722	else // Version >= 2.0
				1723	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1724	exp2x(dst, src, true); // FIXME: 10-bit precision suffices
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1725	}
				1726	}
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1727
Alexis Hetu	53ad4af	2017-12-06 14:49:07 -0500	[diff] [blame]	1728	void ShaderCore::logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1729	{
Alexis Hetu	53ad4af	2017-12-06 14:49:07 -0500	[diff] [blame]	1730	if(shaderModel < 0x0200)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1731	{
				1732	Float4 tmp0;
				1733	Float4 tmp1;
				1734	Float4 t;
				1735	Int4 r;
				1736
				1737	tmp0 = Abs(src.x);
				1738	tmp1 = tmp0;
				1739
				1740	// X component
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1741	r = As<Int4>(As<UInt4>(tmp0) >> 23) - Int4(127);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1742	dst.x = Float4(r);
				1743
				1744	// Y component
				1745	dst.y = As<Float4>((As<Int4>(tmp1) & Int4(0x007FFFFF)) \| As<Int4>(Float4(1.0f)));
				1746
				1747	// Z component
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1748	dst.z = logarithm2(src.x, true, true);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1749
				1750	// W component
				1751	dst.w = 1.0f;
				1752	}
				1753	else
				1754	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1755	log2x(dst, src, true);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1756	}
				1757	}
Nicolas Capens	0bac285	2016-05-07 06:09:58 -0400	[diff] [blame]	1758
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1759	void ShaderCore::cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1760	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1761	cmp0(dst.x, src0.x, src1.x, src2.x);
				1762	cmp0(dst.y, src0.y, src1.y, src2.y);
				1763	cmp0(dst.z, src0.z, src1.z, src2.z);
				1764	cmp0(dst.w, src0.w, src1.w, src2.w);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1765	}
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1766
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1767	void ShaderCore::select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1768	{
				1769	select(dst.x, As<Int4>(src0.x), src1.x, src2.x);
				1770	select(dst.y, As<Int4>(src0.y), src1.y, src2.y);
				1771	select(dst.z, As<Int4>(src0.z), src1.z, src2.z);
				1772	select(dst.w, As<Int4>(src0.w), src1.w, src2.w);
				1773	}
				1774
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1775	void ShaderCore::extract(Float4 &dst, const Vector4f &src0, const Float4 &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1776	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1777	select(dst, CmpEQ(As<Int4>(src1), Int4(1)), src0.y, src0.x);
				1778	select(dst, CmpEQ(As<Int4>(src1), Int4(2)), src0.z, dst);
				1779	select(dst, CmpEQ(As<Int4>(src1), Int4(3)), src0.w, dst);
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1780	}
				1781
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1782	void ShaderCore::insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1783	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1784	select(dst.x, CmpEQ(As<Int4>(index), Int4(0)), element, src.x);
				1785	select(dst.y, CmpEQ(As<Int4>(index), Int4(1)), element, src.y);
				1786	select(dst.z, CmpEQ(As<Int4>(index), Int4(2)), element, src.z);
				1787	select(dst.w, CmpEQ(As<Int4>(index), Int4(3)), element, src.w);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1788	}
				1789
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1790	void ShaderCore::sgn(Float4 &dst, const Float4 &src)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1791	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1792	Int4 neg = As<Int4>(CmpLT(src, Float4(-0.0f))) & As<Int4>(Float4(-1.0f));
				1793	Int4 pos = As<Int4>(CmpNLE(src, Float4(+0.0f))) & As<Int4>(Float4(1.0f));
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1794	dst = As<Float4>(neg \| pos);
				1795	}
				1796
Alexis Hetu	0f44807	2016-03-18 10:56:08 -0400	[diff] [blame]	1797	void ShaderCore::isgn(Float4 &dst, const Float4 &src)
				1798	{
				1799	Int4 neg = CmpLT(As<Int4>(src), Int4(0)) & Int4(-1);
				1800	Int4 pos = CmpNLE(As<Int4>(src), Int4(0)) & Int4(1);
				1801	dst = As<Float4>(neg \| pos);
				1802	}
				1803
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1804	void ShaderCore::cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1805	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1806	Int4 pos = CmpLE(Float4(0.0f), src0);
				1807	select(dst, pos, src1, src2);
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1808	}
				1809
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1810	void ShaderCore::cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2)
				1811	{
				1812	Int4 pos = CmpEQ(Int4(0), As<Int4>(src0));
				1813	select(dst, pos, src1, src2);
				1814	}
				1815
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1816	void ShaderCore::select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1817	{
				1818	// FIXME: LLVM vector select
Tom Anderson	69bc6e8	2017-03-20 11:54:29 -0700	[diff] [blame]	1819	dst = As<Float4>((src0 & As<Int4>(src1)) \| (~src0 & As<Int4>(src2)));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1820	}
				1821
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1822	void ShaderCore::cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control)
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1823	{
				1824	switch(control)
				1825	{
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1826	case Shader::CONTROL_GT:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1827	dst.x = As<Float4>(CmpNLE(src0.x, src1.x));
				1828	dst.y = As<Float4>(CmpNLE(src0.y, src1.y));
				1829	dst.z = As<Float4>(CmpNLE(src0.z, src1.z));
				1830	dst.w = As<Float4>(CmpNLE(src0.w, src1.w));
				1831	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1832	case Shader::CONTROL_EQ:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1833	dst.x = As<Float4>(CmpEQ(src0.x, src1.x));
				1834	dst.y = As<Float4>(CmpEQ(src0.y, src1.y));
				1835	dst.z = As<Float4>(CmpEQ(src0.z, src1.z));
				1836	dst.w = As<Float4>(CmpEQ(src0.w, src1.w));
				1837	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1838	case Shader::CONTROL_GE:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1839	dst.x = As<Float4>(CmpNLT(src0.x, src1.x));
				1840	dst.y = As<Float4>(CmpNLT(src0.y, src1.y));
				1841	dst.z = As<Float4>(CmpNLT(src0.z, src1.z));
				1842	dst.w = As<Float4>(CmpNLT(src0.w, src1.w));
				1843	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1844	case Shader::CONTROL_LT:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1845	dst.x = As<Float4>(CmpLT(src0.x, src1.x));
				1846	dst.y = As<Float4>(CmpLT(src0.y, src1.y));
				1847	dst.z = As<Float4>(CmpLT(src0.z, src1.z));
				1848	dst.w = As<Float4>(CmpLT(src0.w, src1.w));
				1849	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1850	case Shader::CONTROL_NE:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1851	dst.x = As<Float4>(CmpNEQ(src0.x, src1.x));
				1852	dst.y = As<Float4>(CmpNEQ(src0.y, src1.y));
				1853	dst.z = As<Float4>(CmpNEQ(src0.z, src1.z));
				1854	dst.w = As<Float4>(CmpNEQ(src0.w, src1.w));
				1855	break;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1856	case Shader::CONTROL_LE:
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1857	dst.x = As<Float4>(CmpLE(src0.x, src1.x));
				1858	dst.y = As<Float4>(CmpLE(src0.y, src1.y));
				1859	dst.z = As<Float4>(CmpLE(src0.z, src1.z));
				1860	dst.w = As<Float4>(CmpLE(src0.w, src1.w));
				1861	break;
				1862	default:
				1863	ASSERT(false);
				1864	}
				1865	}
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1866
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1867	void ShaderCore::icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1868	{
				1869	switch(control)
				1870	{
				1871	case Shader::CONTROL_GT:
				1872	dst.x = As<Float4>(CmpNLE(As<Int4>(src0.x), As<Int4>(src1.x)));
				1873	dst.y = As<Float4>(CmpNLE(As<Int4>(src0.y), As<Int4>(src1.y)));
				1874	dst.z = As<Float4>(CmpNLE(As<Int4>(src0.z), As<Int4>(src1.z)));
				1875	dst.w = As<Float4>(CmpNLE(As<Int4>(src0.w), As<Int4>(src1.w)));
				1876	break;
				1877	case Shader::CONTROL_EQ:
				1878	dst.x = As<Float4>(CmpEQ(As<Int4>(src0.x), As<Int4>(src1.x)));
				1879	dst.y = As<Float4>(CmpEQ(As<Int4>(src0.y), As<Int4>(src1.y)));
				1880	dst.z = As<Float4>(CmpEQ(As<Int4>(src0.z), As<Int4>(src1.z)));
				1881	dst.w = As<Float4>(CmpEQ(As<Int4>(src0.w), As<Int4>(src1.w)));
				1882	break;
				1883	case Shader::CONTROL_GE:
				1884	dst.x = As<Float4>(CmpNLT(As<Int4>(src0.x), As<Int4>(src1.x)));
				1885	dst.y = As<Float4>(CmpNLT(As<Int4>(src0.y), As<Int4>(src1.y)));
				1886	dst.z = As<Float4>(CmpNLT(As<Int4>(src0.z), As<Int4>(src1.z)));
				1887	dst.w = As<Float4>(CmpNLT(As<Int4>(src0.w), As<Int4>(src1.w)));
				1888	break;
				1889	case Shader::CONTROL_LT:
				1890	dst.x = As<Float4>(CmpLT(As<Int4>(src0.x), As<Int4>(src1.x)));
				1891	dst.y = As<Float4>(CmpLT(As<Int4>(src0.y), As<Int4>(src1.y)));
				1892	dst.z = As<Float4>(CmpLT(As<Int4>(src0.z), As<Int4>(src1.z)));
				1893	dst.w = As<Float4>(CmpLT(As<Int4>(src0.w), As<Int4>(src1.w)));
				1894	break;
				1895	case Shader::CONTROL_NE:
				1896	dst.x = As<Float4>(CmpNEQ(As<Int4>(src0.x), As<Int4>(src1.x)));
				1897	dst.y = As<Float4>(CmpNEQ(As<Int4>(src0.y), As<Int4>(src1.y)));
				1898	dst.z = As<Float4>(CmpNEQ(As<Int4>(src0.z), As<Int4>(src1.z)));
				1899	dst.w = As<Float4>(CmpNEQ(As<Int4>(src0.w), As<Int4>(src1.w)));
				1900	break;
				1901	case Shader::CONTROL_LE:
				1902	dst.x = As<Float4>(CmpLE(As<Int4>(src0.x), As<Int4>(src1.x)));
				1903	dst.y = As<Float4>(CmpLE(As<Int4>(src0.y), As<Int4>(src1.y)));
				1904	dst.z = As<Float4>(CmpLE(As<Int4>(src0.z), As<Int4>(src1.z)));
				1905	dst.w = As<Float4>(CmpLE(As<Int4>(src0.w), As<Int4>(src1.w)));
				1906	break;
				1907	default:
				1908	ASSERT(false);
				1909	}
				1910	}
				1911
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1912	void ShaderCore::ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control)
				1913	{
				1914	switch(control)
				1915	{
				1916	case Shader::CONTROL_GT:
				1917	dst.x = As<Float4>(CmpNLE(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1918	dst.y = As<Float4>(CmpNLE(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1919	dst.z = As<Float4>(CmpNLE(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1920	dst.w = As<Float4>(CmpNLE(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1921	break;
				1922	case Shader::CONTROL_EQ:
				1923	dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1924	dst.y = As<Float4>(CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1925	dst.z = As<Float4>(CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1926	dst.w = As<Float4>(CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1927	break;
				1928	case Shader::CONTROL_GE:
				1929	dst.x = As<Float4>(CmpNLT(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1930	dst.y = As<Float4>(CmpNLT(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1931	dst.z = As<Float4>(CmpNLT(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1932	dst.w = As<Float4>(CmpNLT(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1933	break;
				1934	case Shader::CONTROL_LT:
				1935	dst.x = As<Float4>(CmpLT(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1936	dst.y = As<Float4>(CmpLT(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1937	dst.z = As<Float4>(CmpLT(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1938	dst.w = As<Float4>(CmpLT(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1939	break;
				1940	case Shader::CONTROL_NE:
				1941	dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1942	dst.y = As<Float4>(CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1943	dst.z = As<Float4>(CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1944	dst.w = As<Float4>(CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1945	break;
				1946	case Shader::CONTROL_LE:
				1947	dst.x = As<Float4>(CmpLE(As<UInt4>(src0.x), As<UInt4>(src1.x)));
				1948	dst.y = As<Float4>(CmpLE(As<UInt4>(src0.y), As<UInt4>(src1.y)));
				1949	dst.z = As<Float4>(CmpLE(As<UInt4>(src0.z), As<UInt4>(src1.z)));
				1950	dst.w = As<Float4>(CmpLE(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				1951	break;
				1952	default:
				1953	ASSERT(false);
				1954	}
				1955	}
				1956
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1957	void ShaderCore::all(Float4 &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1958	{
				1959	dst = As<Float4>(As<Int4>(src.x) & As<Int4>(src.y) & As<Int4>(src.z) & As<Int4>(src.w));
				1960	}
				1961
Alexis Hetu	ecad519	2015-06-05 13:42:05 -0400	[diff] [blame]	1962	void ShaderCore::any(Float4 &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1963	{
				1964	dst = As<Float4>(As<Int4>(src.x) \| As<Int4>(src.y) \| As<Int4>(src.z) \| As<Int4>(src.w));
				1965	}
				1966
Alexis Hetu	24f454e	2016-08-31 17:22:13 -0400	[diff] [blame]	1967	void ShaderCore::bitwise_not(Vector4f &dst, const Vector4f &src)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1968	{
				1969	dst.x = As<Float4>(As<Int4>(src.x) ^ Int4(0xFFFFFFFF));
				1970	dst.y = As<Float4>(As<Int4>(src.y) ^ Int4(0xFFFFFFFF));
				1971	dst.z = As<Float4>(As<Int4>(src.z) ^ Int4(0xFFFFFFFF));
				1972	dst.w = As<Float4>(As<Int4>(src.w) ^ Int4(0xFFFFFFFF));
				1973	}
				1974
Alexis Hetu	24f454e	2016-08-31 17:22:13 -0400	[diff] [blame]	1975	void ShaderCore::bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1976	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1977	dst.x = As<Float4>(As<Int4>(src0.x) \| As<Int4>(src1.x));
				1978	dst.y = As<Float4>(As<Int4>(src0.y) \| As<Int4>(src1.y));
				1979	dst.z = As<Float4>(As<Int4>(src0.z) \| As<Int4>(src1.z));
				1980	dst.w = As<Float4>(As<Int4>(src0.w) \| As<Int4>(src1.w));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1981	}
				1982
Alexis Hetu	24f454e	2016-08-31 17:22:13 -0400	[diff] [blame]	1983	void ShaderCore::bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1984	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1985	dst.x = As<Float4>(As<Int4>(src0.x) ^ As<Int4>(src1.x));
				1986	dst.y = As<Float4>(As<Int4>(src0.y) ^ As<Int4>(src1.y));
				1987	dst.z = As<Float4>(As<Int4>(src0.z) ^ As<Int4>(src1.z));
				1988	dst.w = As<Float4>(As<Int4>(src0.w) ^ As<Int4>(src1.w));
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1989	}
				1990
Alexis Hetu	24f454e	2016-08-31 17:22:13 -0400	[diff] [blame]	1991	void ShaderCore::bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	1992	{
Alexis Hetu	c4f2c29	2015-08-18 15:43:09 -0400	[diff] [blame]	1993	dst.x = As<Float4>(As<Int4>(src0.x) & As<Int4>(src1.x));
				1994	dst.y = As<Float4>(As<Int4>(src0.y) & As<Int4>(src1.y));
				1995	dst.z = As<Float4>(As<Int4>(src0.z) & As<Int4>(src1.z));
				1996	dst.w = As<Float4>(As<Int4>(src0.w) & As<Int4>(src1.w));
				1997	}
				1998
				1999	void ShaderCore::equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				2000	{
				2001	dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) &
				2002	CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) &
				2003	CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) &
				2004	CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				2005	dst.y = dst.x;
				2006	dst.z = dst.x;
				2007	dst.w = dst.x;
				2008	}
				2009
				2010	void ShaderCore::notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
				2011	{
				2012	dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) \|
				2013	CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) \|
				2014	CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) \|
				2015	CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w)));
				2016	dst.y = dst.x;
				2017	dst.z = dst.x;
				2018	dst.w = dst.x;
John Bauman	19bac1e	2014-05-06 15:23:49 -0400	[diff] [blame]	2019	}
John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	2020	}