blob: 146e42d8d0a26c564027af190c593180a668b26a [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "PixelRoutine.hpp"
16
John Bauman89401822014-05-06 15:04:28 -040017#include "SamplerCore.hpp"
18#include "Constants.hpp"
Nicolas Capens708c24b2017-10-26 13:07:10 -040019#include "Renderer/Renderer.hpp"
20#include "Renderer/QuadRasterizer.hpp"
21#include "Renderer/Surface.hpp"
22#include "Renderer/Primitive.hpp"
23#include "Common/Debug.hpp"
John Bauman89401822014-05-06 15:04:28 -040024
John Bauman89401822014-05-06 15:04:28 -040025namespace sw
26{
27 extern bool complementaryDepthBuffer;
28 extern bool postBlendSRGB;
29 extern bool exactColorRounding;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040030 extern bool forceClearRegisters;
John Bauman89401822014-05-06 15:04:28 -040031
Nicolas Capens5bff4052018-05-28 13:18:59 -040032 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader)
33 : QuadRasterizer(state, shader), v(shader && shader->indirectAddressableInput)
John Bauman89401822014-05-06 15:04:28 -040034 {
Alexis Hetu53ad4af2017-12-06 14:49:07 -050035 if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters)
John Bauman89401822014-05-06 15:04:28 -040036 {
Nicolas Capens3b4c93f2016-05-18 12:51:37 -040037 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++)
Alexis Hetuf2a8c372015-07-13 11:08:41 -040038 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -040039 v[i].x = Float4(0.0f);
40 v[i].y = Float4(0.0f);
41 v[i].z = Float4(0.0f);
42 v[i].w = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040043 }
John Bauman89401822014-05-06 15:04:28 -040044 }
45 }
46
47 PixelRoutine::~PixelRoutine()
48 {
John Bauman89401822014-05-06 15:04:28 -040049 }
50
Nicolas Capens4f172c72016-01-13 08:34:30 -050051 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
John Bauman89401822014-05-06 15:04:28 -040052 {
53 #if PERF_PROFILE
54 Long pipeTime = Ticks();
55 #endif
56
John Bauman89401822014-05-06 15:04:28 -040057 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
John Bauman89401822014-05-06 15:04:28 -040058
59 Int zMask[4]; // Depth mask
60 Int sMask[4]; // Stencil mask
61
62 for(unsigned int q = 0; q < state.multiSample; q++)
63 {
64 zMask[q] = cMask[q];
65 sMask[q] = cMask[q];
66 }
67
68 for(unsigned int q = 0; q < state.multiSample; q++)
69 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050070 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -040071 }
72
73 Float4 f;
John Bauman89401822014-05-06 15:04:28 -040074 Float4 rhwCentroid;
75
Nicolas Capens4f172c72016-01-13 08:34:30 -050076 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
John Bauman89401822014-05-06 15:04:28 -040077
John Bauman19bac1e2014-05-06 15:23:49 -040078 if(interpolateZ())
John Bauman89401822014-05-06 15:04:28 -040079 {
80 for(unsigned int q = 0; q < state.multiSample; q++)
81 {
82 Float4 x = xxxx;
Nicolas Capens4f172c72016-01-13 08:34:30 -050083
John Bauman89401822014-05-06 15:04:28 -040084 if(state.multiSample > 1)
85 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050086 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
John Bauman89401822014-05-06 15:04:28 -040087 }
88
Nicolas Capens5ba372f2017-10-05 16:05:47 -040089 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
John Bauman89401822014-05-06 15:04:28 -040090 }
91 }
92
93 Bool depthPass = false;
94
95 if(earlyDepthTest)
96 {
97 for(unsigned int q = 0; q < state.multiSample; q++)
98 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050099 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400100 }
101 }
102
103 If(depthPass || Bool(!earlyDepthTest))
104 {
105 #if PERF_PROFILE
106 Long interpTime = Ticks();
107 #endif
108
Nicolas Capens4f172c72016-01-13 08:34:30 -0500109 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
Nicolas Capenscbefe532014-10-16 00:16:01 -0400110
John Bauman89401822014-05-06 15:04:28 -0400111 // Centroid locations
112 Float4 XXXX = Float4(0.0f);
113 Float4 YYYY = Float4(0.0f);
114
115 if(state.centroid)
116 {
117 Float4 WWWW(1.0e-9f);
118
119 for(unsigned int q = 0; q < state.multiSample; q++)
120 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500121 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
122 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
123 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400124 }
125
126 WWWW = Rcp_pp(WWWW);
127 XXXX *= WWWW;
128 YYYY *= WWWW;
129
130 XXXX += xxxx;
131 YYYY += yyyy;
132 }
133
John Bauman19bac1e2014-05-06 15:23:49 -0400134 if(interpolateW())
John Bauman89401822014-05-06 15:04:28 -0400135 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400136 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500137 rhw = reciprocal(w, false, false, true);
John Bauman89401822014-05-06 15:04:28 -0400138
139 if(state.centroid)
140 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500141 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
John Bauman89401822014-05-06 15:04:28 -0400142 }
143 }
144
Nicolas Capens3b4c93f2016-05-18 12:51:37 -0400145 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
John Bauman89401822014-05-06 15:04:28 -0400146 {
147 for(int component = 0; component < 4; component++)
148 {
John Bauman89401822014-05-06 15:04:28 -0400149 if(state.interpolant[interpolant].component & (1 << component))
150 {
151 if(!state.interpolant[interpolant].centroid)
152 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400153 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective, false);
John Bauman89401822014-05-06 15:04:28 -0400154 }
155 else
156 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500157 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400158 }
159 }
160 }
161
162 Float4 rcp;
163
164 switch(state.interpolant[interpolant].project)
165 {
166 case 0:
167 break;
168 case 1:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500169 rcp = reciprocal(v[interpolant].y);
170 v[interpolant].x = v[interpolant].x * rcp;
John Bauman89401822014-05-06 15:04:28 -0400171 break;
172 case 2:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500173 rcp = reciprocal(v[interpolant].z);
174 v[interpolant].x = v[interpolant].x * rcp;
175 v[interpolant].y = v[interpolant].y * rcp;
John Bauman89401822014-05-06 15:04:28 -0400176 break;
177 case 3:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500178 rcp = reciprocal(v[interpolant].w);
179 v[interpolant].x = v[interpolant].x * rcp;
180 v[interpolant].y = v[interpolant].y * rcp;
181 v[interpolant].z = v[interpolant].z * rcp;
John Bauman89401822014-05-06 15:04:28 -0400182 break;
183 }
184 }
185
186 if(state.fog.component)
187 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400188 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective, false);
John Bauman89401822014-05-06 15:04:28 -0400189 }
190
Nicolas Capens4f172c72016-01-13 08:34:30 -0500191 setBuiltins(x, y, z, w);
John Bauman89401822014-05-06 15:04:28 -0400192
193 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500194 cycles[PERF_INTERP] += Ticks() - interpTime;
John Bauman89401822014-05-06 15:04:28 -0400195 #endif
196
197 Bool alphaPass = true;
198
199 if(colorUsed())
200 {
201 #if PERF_PROFILE
202 Long shaderTime = Ticks();
203 #endif
204
Nicolas Capens4f172c72016-01-13 08:34:30 -0500205 applyShader(cMask);
John Bauman89401822014-05-06 15:04:28 -0400206
207 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500208 cycles[PERF_SHADER] += Ticks() - shaderTime;
John Bauman89401822014-05-06 15:04:28 -0400209 #endif
210
Nicolas Capens4f172c72016-01-13 08:34:30 -0500211 alphaPass = alphaTest(cMask);
John Bauman89401822014-05-06 15:04:28 -0400212
John Bauman19bac1e2014-05-06 15:23:49 -0400213 if((shader && shader->containsKill()) || state.alphaTestActive())
John Bauman89401822014-05-06 15:04:28 -0400214 {
215 for(unsigned int q = 0; q < state.multiSample; q++)
216 {
217 zMask[q] &= cMask[q];
218 sMask[q] &= cMask[q];
219 }
220 }
221 }
222
223 If(alphaPass)
224 {
225 if(!earlyDepthTest)
226 {
227 for(unsigned int q = 0; q < state.multiSample; q++)
228 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500229 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400230 }
231 }
232
233 #if PERF_PROFILE
234 Long ropTime = Ticks();
235 #endif
236
237 If(depthPass || Bool(earlyDepthTest))
238 {
239 for(unsigned int q = 0; q < state.multiSample; q++)
240 {
241 if(state.multiSampleMask & (1 << q))
242 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500243 writeDepth(zBuffer, q, x, z[q], zMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400244
245 if(state.occlusionEnabled)
246 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500247 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
John Bauman89401822014-05-06 15:04:28 -0400248 }
249 }
250 }
251
252 if(colorUsed())
253 {
254 #if PERF_PROFILE
John Bauman66b8ab22014-05-06 15:57:45 -0400255 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
John Bauman89401822014-05-06 15:04:28 -0400256 #endif
257
Nicolas Capens4f172c72016-01-13 08:34:30 -0500258 rasterOperation(f, cBuffer, x, sMask, zMask, cMask);
John Bauman89401822014-05-06 15:04:28 -0400259 }
260 }
261
262 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500263 cycles[PERF_ROP] += Ticks() - ropTime;
John Bauman89401822014-05-06 15:04:28 -0400264 #endif
265 }
266 }
267
268 for(unsigned int q = 0; q < state.multiSample; q++)
269 {
270 if(state.multiSampleMask & (1 << q))
271 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500272 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400273 }
274 }
275
276 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500277 cycles[PERF_PIPE] += Ticks() - pipeTime;
John Bauman89401822014-05-06 15:04:28 -0400278 #endif
279 }
280
John Bauman89401822014-05-06 15:04:28 -0400281 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
282 {
283 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
284
285 if(!flat)
286 {
287 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
288 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
289
290 if(perspective)
291 {
292 interpolant *= rhw;
293 }
294 }
295
296 return interpolant;
297 }
298
Nicolas Capens4f172c72016-01-13 08:34:30 -0500299 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400300 {
301 if(!state.stencilActive)
302 {
303 return;
304 }
305
306 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
307
308 Pointer<Byte> buffer = sBuffer + 2 * x;
309
310 if(q > 0)
311 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500312 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400313 }
314
Nicolas Capens48ef1252016-11-07 15:30:33 -0500315 Byte8 value = *Pointer<Byte8>(buffer);
John Bauman89401822014-05-06 15:04:28 -0400316 Byte8 valueCCW = value;
317
318 if(!state.noStencilMask)
319 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500320 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400321 }
322
Nicolas Capens4f172c72016-01-13 08:34:30 -0500323 stencilTest(value, state.stencilCompareMode, false);
John Bauman89401822014-05-06 15:04:28 -0400324
325 if(state.twoSidedStencil)
326 {
327 if(!state.noStencilMaskCCW)
328 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500329 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400330 }
331
Nicolas Capens4f172c72016-01-13 08:34:30 -0500332 stencilTest(valueCCW, state.stencilCompareModeCCW, true);
John Bauman89401822014-05-06 15:04:28 -0400333
Nicolas Capens4f172c72016-01-13 08:34:30 -0500334 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
335 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400336 value |= valueCCW;
337 }
338
339 sMask = SignMask(value) & cMask;
340 }
341
Nicolas Capens4f172c72016-01-13 08:34:30 -0500342 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400343 {
344 Byte8 equal;
345
346 switch(stencilCompareMode)
347 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400348 case STENCIL_ALWAYS:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400349 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400350 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400351 case STENCIL_NEVER:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400352 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
John Bauman89401822014-05-06 15:04:28 -0400353 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400354 case STENCIL_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400355 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500356 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400357 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400358 case STENCIL_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500359 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400360 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400361 case STENCIL_NOTEQUAL: // a != b ~ !(a == b)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500362 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
Nicolas Capens16b5f152016-10-13 13:39:01 -0400363 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400364 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400365 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
John Bauman89401822014-05-06 15:04:28 -0400366 equal = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500367 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400368 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500369 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400370 value |= equal;
371 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400372 case STENCIL_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500373 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
John Bauman89401822014-05-06 15:04:28 -0400374 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
375 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
376 value = equal;
377 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400378 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
John Bauman89401822014-05-06 15:04:28 -0400379 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500380 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
Nicolas Capens16b5f152016-10-13 13:39:01 -0400381 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400382 break;
383 default:
384 ASSERT(false);
385 }
386 }
387
Nicolas Capens4f172c72016-01-13 08:34:30 -0500388 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400389 {
390 if(!state.depthTestActive)
391 {
392 return true;
393 }
394
395 Float4 Z = z;
396
John Bauman19bac1e2014-05-06 15:23:49 -0400397 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400398 {
399 if(complementaryDepthBuffer)
400 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500401 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400402 }
403 else
404 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500405 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400406 }
407 }
408
409 Pointer<Byte> buffer;
410 Int pitch;
411
412 if(!state.quadLayoutDepthBuffer)
413 {
414 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500415 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400416 }
417 else
418 {
419 buffer = zBuffer + 8 * x;
420 }
421
422 if(q > 0)
423 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500424 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400425 }
426
427 Float4 zValue;
428
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400429 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400430 {
431 if(!state.quadLayoutDepthBuffer)
432 {
433 // FIXME: Properly optimizes?
434 zValue.xy = *Pointer<Float4>(buffer);
435 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
436 }
437 else
438 {
439 zValue = *Pointer<Float4>(buffer, 16);
440 }
441 }
442
443 Int4 zTest;
444
445 switch(state.depthCompareMode)
446 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400447 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400448 // Optimized
449 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400450 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400451 // Optimized
452 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400453 case DEPTH_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400454 zTest = CmpEQ(zValue, Z);
455 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400456 case DEPTH_NOTEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400457 zTest = CmpNEQ(zValue, Z);
458 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400459 case DEPTH_LESS:
John Bauman89401822014-05-06 15:04:28 -0400460 if(complementaryDepthBuffer)
461 {
462 zTest = CmpLT(zValue, Z);
463 }
464 else
465 {
466 zTest = CmpNLE(zValue, Z);
467 }
468 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400469 case DEPTH_GREATEREQUAL:
John Bauman89401822014-05-06 15:04:28 -0400470 if(complementaryDepthBuffer)
471 {
472 zTest = CmpNLT(zValue, Z);
473 }
474 else
475 {
476 zTest = CmpLE(zValue, Z);
477 }
478 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400479 case DEPTH_LESSEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400480 if(complementaryDepthBuffer)
481 {
482 zTest = CmpLE(zValue, Z);
483 }
484 else
485 {
486 zTest = CmpNLT(zValue, Z);
487 }
488 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400489 case DEPTH_GREATER:
John Bauman89401822014-05-06 15:04:28 -0400490 if(complementaryDepthBuffer)
491 {
492 zTest = CmpNLE(zValue, Z);
493 }
494 else
495 {
496 zTest = CmpLT(zValue, Z);
497 }
498 break;
499 default:
500 ASSERT(false);
501 }
502
503 switch(state.depthCompareMode)
504 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400505 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400506 zMask = cMask;
507 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400508 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400509 zMask = 0x0;
510 break;
511 default:
512 zMask = SignMask(zTest) & cMask;
513 break;
514 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500515
John Bauman89401822014-05-06 15:04:28 -0400516 if(state.stencilActive)
517 {
518 zMask &= sMask;
519 }
520
521 return zMask != 0;
522 }
523
Nicolas Capens4f172c72016-01-13 08:34:30 -0500524 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400525 {
526 Short4 cmp;
527 Short4 equal;
528
529 switch(state.alphaCompareMode)
530 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400531 case ALPHA_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400532 aMask = 0xF;
533 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400534 case ALPHA_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400535 aMask = 0x0;
536 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400537 case ALPHA_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500538 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
Nicolas Capens33438a62017-09-27 11:47:35 -0400539 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400540 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400541 case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
542 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
Nicolas Capens33438a62017-09-27 11:47:35 -0400543 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400544 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400545 case ALPHA_LESS: // a < b ~ b > a
Nicolas Capens4f172c72016-01-13 08:34:30 -0500546 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha);
Nicolas Capens33438a62017-09-27 11:47:35 -0400547 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400548 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400549 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
Nicolas Capens4f172c72016-01-13 08:34:30 -0500550 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
551 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
John Bauman89401822014-05-06 15:04:28 -0400552 cmp |= equal;
Nicolas Capens33438a62017-09-27 11:47:35 -0400553 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400554 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400555 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
556 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
Nicolas Capens33438a62017-09-27 11:47:35 -0400557 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400558 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400559 case ALPHA_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500560 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
Nicolas Capens33438a62017-09-27 11:47:35 -0400561 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400562 break;
563 default:
564 ASSERT(false);
565 }
566 }
567
Nicolas Capens4f172c72016-01-13 08:34:30 -0500568 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400569 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500570 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
571 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
572 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
573 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
John Bauman89401822014-05-06 15:04:28 -0400574
575 Int aMask0 = SignMask(coverage0);
576 Int aMask1 = SignMask(coverage1);
577 Int aMask2 = SignMask(coverage2);
578 Int aMask3 = SignMask(coverage3);
579
580 cMask[0] &= aMask0;
581 cMask[1] &= aMask1;
582 cMask[2] &= aMask2;
583 cMask[3] &= aMask3;
584 }
585
Nicolas Capens4f172c72016-01-13 08:34:30 -0500586 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog)
John Bauman89401822014-05-06 15:04:28 -0400587 {
588 if(!state.fogActive)
589 {
590 return;
591 }
592
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400593 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400594 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500595 pixelFog(fog);
John Bauman89401822014-05-06 15:04:28 -0400596
John Bauman19bac1e2014-05-06 15:23:49 -0400597 fog = Min(fog, Float4(1.0f));
598 fog = Max(fog, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -0400599 }
600
Nicolas Capens4f172c72016-01-13 08:34:30 -0500601 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
602 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
603 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400604
John Bauman19bac1e2014-05-06 15:23:49 -0400605 c0.x *= fog;
606 c0.y *= fog;
607 c0.z *= fog;
John Bauman89401822014-05-06 15:04:28 -0400608
Nicolas Capens4f172c72016-01-13 08:34:30 -0500609 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
610 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
611 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400612 }
613
Nicolas Capens4f172c72016-01-13 08:34:30 -0500614 void PixelRoutine::pixelFog(Float4 &visibility)
John Bauman89401822014-05-06 15:04:28 -0400615 {
616 Float4 &zw = visibility;
617
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400618 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400619 {
620 if(state.wBasedFog)
621 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500622 zw = rhw;
John Bauman89401822014-05-06 15:04:28 -0400623 }
624 else
625 {
626 if(complementaryDepthBuffer)
627 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500628 zw = Float4(1.0f) - z[0];
John Bauman89401822014-05-06 15:04:28 -0400629 }
630 else
631 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500632 zw = z[0];
John Bauman89401822014-05-06 15:04:28 -0400633 }
634 }
635 }
636
637 switch(state.pixelFogMode)
638 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400639 case FOG_NONE:
John Bauman89401822014-05-06 15:04:28 -0400640 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400641 case FOG_LINEAR:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500642 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale));
643 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
John Bauman89401822014-05-06 15:04:28 -0400644 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400645 case FOG_EXP:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500646 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE));
John Bauman19bac1e2014-05-06 15:23:49 -0400647 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400648 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400649 case FOG_EXP2:
John Bauman89401822014-05-06 15:04:28 -0400650 zw *= zw;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500651 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E));
John Bauman19bac1e2014-05-06 15:23:49 -0400652 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400653 break;
654 default:
655 ASSERT(false);
656 }
657 }
658
Nicolas Capens4f172c72016-01-13 08:34:30 -0500659 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
John Bauman89401822014-05-06 15:04:28 -0400660 {
661 if(!state.depthWriteEnable)
662 {
663 return;
664 }
665
666 Float4 Z = z;
667
John Bauman19bac1e2014-05-06 15:23:49 -0400668 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400669 {
670 if(complementaryDepthBuffer)
671 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500672 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400673 }
674 else
675 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500676 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400677 }
678 }
679
680 Pointer<Byte> buffer;
681 Int pitch;
682
683 if(!state.quadLayoutDepthBuffer)
Nicolas Capens05b3d662016-02-25 23:58:33 -0500684 {
John Bauman89401822014-05-06 15:04:28 -0400685 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500686 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400687 }
688 else
Nicolas Capens05b3d662016-02-25 23:58:33 -0500689 {
John Bauman89401822014-05-06 15:04:28 -0400690 buffer = zBuffer + 8 * x;
691 }
692
693 if(q > 0)
694 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500695 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400696 }
697
698 Float4 zValue;
699
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400700 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400701 {
702 if(!state.quadLayoutDepthBuffer)
703 {
704 // FIXME: Properly optimizes?
705 zValue.xy = *Pointer<Float4>(buffer);
706 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
707 }
708 else
709 {
710 zValue = *Pointer<Float4>(buffer, 16);
711 }
712 }
713
Nicolas Capens4f172c72016-01-13 08:34:30 -0500714 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
715 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -0400716 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
717
718 if(!state.quadLayoutDepthBuffer)
719 {
720 // FIXME: Properly optimizes?
721 *Pointer<Float2>(buffer) = Float2(Z.xy);
722 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
723 }
724 else
725 {
726 *Pointer<Float4>(buffer, 16) = Z;
727 }
728 }
729
Nicolas Capens4f172c72016-01-13 08:34:30 -0500730 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400731 {
732 if(!state.stencilActive)
733 {
734 return;
735 }
736
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400737 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP)
John Bauman89401822014-05-06 15:04:28 -0400738 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400739 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP))
John Bauman89401822014-05-06 15:04:28 -0400740 {
741 return;
742 }
743 }
744
745 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
746 {
747 return;
748 }
749
750 Pointer<Byte> buffer = sBuffer + 2 * x;
751
752 if(q > 0)
753 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500754 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400755 }
756
Nicolas Capens48ef1252016-11-07 15:30:33 -0500757 Byte8 bufferValue = *Pointer<Byte8>(buffer);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500758
John Bauman89401822014-05-06 15:04:28 -0400759 Byte8 newValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500760 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400761
762 if(!state.noStencilWriteMask)
763 {
764 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500765 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
766 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400767 newValue |= maskedValue;
768 }
769
770 if(state.twoSidedStencil)
771 {
772 Byte8 newValueCCW;
773
Nicolas Capens4f172c72016-01-13 08:34:30 -0500774 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400775
776 if(!state.noStencilWriteMaskCCW)
777 {
778 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500779 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
780 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400781 newValueCCW |= maskedValue;
782 }
783
Nicolas Capens4f172c72016-01-13 08:34:30 -0500784 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
785 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400786 newValue |= newValueCCW;
787 }
788
Nicolas Capens4f172c72016-01-13 08:34:30 -0500789 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
790 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
John Bauman89401822014-05-06 15:04:28 -0400791 newValue |= bufferValue;
792
Nicolas Capens16b5f152016-10-13 13:39:01 -0400793 *Pointer<Byte4>(buffer) = Byte4(newValue);
John Bauman89401822014-05-06 15:04:28 -0400794 }
795
Nicolas Capens4f172c72016-01-13 08:34:30 -0500796 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
John Bauman89401822014-05-06 15:04:28 -0400797 {
798 Byte8 &pass = newValue;
799 Byte8 fail;
800 Byte8 zFail;
801
Nicolas Capens4f172c72016-01-13 08:34:30 -0500802 stencilOperation(pass, bufferValue, stencilPassOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400803
804 if(stencilZFailOperation != stencilPassOperation)
805 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500806 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400807 }
808
809 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
810 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500811 stencilOperation(fail, bufferValue, stencilFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400812 }
813
814 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
815 {
816 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
817 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500818 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
819 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
John Bauman89401822014-05-06 15:04:28 -0400820 pass |= zFail;
821 }
822
Nicolas Capens4f172c72016-01-13 08:34:30 -0500823 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
824 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
John Bauman89401822014-05-06 15:04:28 -0400825 pass |= fail;
826 }
827 }
828
Nicolas Capens4f172c72016-01-13 08:34:30 -0500829 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400830 {
831 switch(operation)
832 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400833 case OPERATION_KEEP:
John Bauman89401822014-05-06 15:04:28 -0400834 output = bufferValue;
835 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400836 case OPERATION_ZERO:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400837 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
John Bauman89401822014-05-06 15:04:28 -0400838 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400839 case OPERATION_REPLACE:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500840 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ));
John Bauman89401822014-05-06 15:04:28 -0400841 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400842 case OPERATION_INCRSAT:
John Bauman89401822014-05-06 15:04:28 -0400843 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
844 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400845 case OPERATION_DECRSAT:
John Bauman89401822014-05-06 15:04:28 -0400846 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
847 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400848 case OPERATION_INVERT:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400849 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400850 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400851 case OPERATION_INCR:
John Bauman89401822014-05-06 15:04:28 -0400852 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
853 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400854 case OPERATION_DECR:
John Bauman89401822014-05-06 15:04:28 -0400855 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
856 break;
857 default:
858 ASSERT(false);
859 }
860 }
861
Nicolas Capens96d4e092016-11-18 14:22:38 -0500862 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -0400863 {
864 switch(blendFactorActive)
865 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400866 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400867 // Optimized
868 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400869 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400870 // Optimized
871 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400872 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400873 blendFactor.x = current.x;
874 blendFactor.y = current.y;
875 blendFactor.z = current.z;
John Bauman89401822014-05-06 15:04:28 -0400876 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400877 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400878 blendFactor.x = Short4(0xFFFFu) - current.x;
879 blendFactor.y = Short4(0xFFFFu) - current.y;
880 blendFactor.z = Short4(0xFFFFu) - current.z;
John Bauman89401822014-05-06 15:04:28 -0400881 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400882 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400883 blendFactor.x = pixel.x;
884 blendFactor.y = pixel.y;
885 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400886 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400887 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400888 blendFactor.x = Short4(0xFFFFu) - pixel.x;
889 blendFactor.y = Short4(0xFFFFu) - pixel.y;
890 blendFactor.z = Short4(0xFFFFu) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400891 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400892 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400893 blendFactor.x = current.w;
894 blendFactor.y = current.w;
895 blendFactor.z = current.w;
John Bauman89401822014-05-06 15:04:28 -0400896 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400897 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400898 blendFactor.x = Short4(0xFFFFu) - current.w;
899 blendFactor.y = Short4(0xFFFFu) - current.w;
900 blendFactor.z = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400901 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400902 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400903 blendFactor.x = pixel.w;
904 blendFactor.y = pixel.w;
905 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400906 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400907 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400908 blendFactor.x = Short4(0xFFFFu) - pixel.w;
909 blendFactor.y = Short4(0xFFFFu) - pixel.w;
910 blendFactor.z = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400911 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400912 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400913 blendFactor.x = Short4(0xFFFFu) - pixel.w;
914 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
915 blendFactor.y = blendFactor.x;
916 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -0400917 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400918 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500919 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
920 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
921 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400922 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400923 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500924 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
925 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
926 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400927 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400928 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500929 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
930 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
931 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400932 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400933 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500934 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
935 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
936 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400937 break;
938 default:
939 ASSERT(false);
940 }
941 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500942
Nicolas Capens96d4e092016-11-18 14:22:38 -0500943 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -0400944 {
945 switch(blendFactorAlphaActive)
946 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400947 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400948 // Optimized
949 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400950 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400951 // Optimized
952 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400953 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400954 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400955 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400956 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400957 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400958 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400959 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400960 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400961 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400962 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400963 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400964 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400965 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400966 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400967 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400968 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400969 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400970 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400971 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400972 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400973 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400974 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400975 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400976 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400977 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400978 blendFactor.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -0400979 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400980 case BLEND_CONSTANT:
981 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500982 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400983 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400984 case BLEND_INVCONSTANT:
985 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500986 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400987 break;
988 default:
989 ASSERT(false);
990 }
991 }
992
Alexis Hetu049a1872016-04-25 16:59:58 -0400993 bool PixelRoutine::isSRGB(int index) const
994 {
Nicolas Capens8f7739a2017-12-16 02:06:56 -0500995 return Surface::isSRGBformat(state.targetFormat[index]);
Alexis Hetu049a1872016-04-25 16:59:58 -0400996 }
997
Nicolas Capens4f172c72016-01-13 08:34:30 -0500998 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
John Bauman89401822014-05-06 15:04:28 -0400999 {
John Bauman89401822014-05-06 15:04:28 -04001000 Short4 c01;
1001 Short4 c23;
Maxime Grégoired9762742015-07-08 16:43:48 -04001002 Pointer<Byte> buffer;
Nicolas Capensb40a2562016-01-05 00:08:45 -05001003 Pointer<Byte> buffer2;
John Bauman89401822014-05-06 15:04:28 -04001004
John Bauman89401822014-05-06 15:04:28 -04001005 switch(state.targetFormat[index])
1006 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001007 case FORMAT_R5G6B5:
1008 buffer = cBuffer + 2 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001009 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capensb40a2562016-01-05 00:08:45 -05001010 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001011
1012 pixel.x = c01 & Short4(0xF800u);
1013 pixel.y = (c01 & Short4(0x07E0u)) << 5;
1014 pixel.z = (c01 & Short4(0x001Fu)) << 11;
1015 pixel.w = Short4(0xFFFFu);
1016 break;
John Bauman89401822014-05-06 15:04:28 -04001017 case FORMAT_A8R8G8B8:
1018 buffer = cBuffer + 4 * x;
1019 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001020 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001021 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001022 pixel.z = c01;
1023 pixel.y = c01;
1024 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1025 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1026 pixel.x = pixel.z;
1027 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1028 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1029 pixel.y = pixel.z;
1030 pixel.w = pixel.x;
1031 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1032 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1033 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1034 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001035 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001036 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001037 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001038 buffer = cBuffer + 4 * x;
1039 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001040 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001041 c23 = *Pointer<Short4>(buffer);
1042 pixel.z = c01;
1043 pixel.y = c01;
1044 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1045 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1046 pixel.x = pixel.z;
1047 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1048 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1049 pixel.y = pixel.z;
1050 pixel.w = pixel.x;
1051 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1052 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1053 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1054 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1055 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001056 case FORMAT_A8:
1057 buffer = cBuffer + 1 * x;
1058 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001059 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman66b8ab22014-05-06 15:57:45 -04001060 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1);
1061 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1062 pixel.x = Short4(0x0000);
1063 pixel.y = Short4(0x0000);
1064 pixel.z = Short4(0x0000);
1065 break;
Nicolas Capens7a473b72017-10-25 17:18:55 -04001066 case FORMAT_R8:
1067 buffer = cBuffer + 1 * x;
1068 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
1069 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1070 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
1071 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1072 pixel.y = Short4(0x0000);
1073 pixel.z = Short4(0x0000);
1074 pixel.w = Short4(0xFFFFu);
1075 break;
John Bauman89401822014-05-06 15:04:28 -04001076 case FORMAT_X8R8G8B8:
1077 buffer = cBuffer + 4 * x;
1078 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001079 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001080 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001081 pixel.z = c01;
1082 pixel.y = c01;
1083 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1084 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1085 pixel.x = pixel.z;
1086 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1087 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1088 pixel.y = pixel.z;
1089 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1090 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1091 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1092 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001093 break;
Nicolas Capense4bdbc32017-12-07 20:46:49 -05001094 case FORMAT_G8R8:
1095 buffer = cBuffer + 2 * x;
1096 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
1097 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1098 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
1099 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
1100 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
1101 pixel.z = Short4(0x0000u);
1102 pixel.w = Short4(0xFFFFu);
1103 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001104 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001105 case FORMAT_SRGB8_X8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001106 buffer = cBuffer + 4 * x;
1107 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001108 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001109 c23 = *Pointer<Short4>(buffer);
1110 pixel.z = c01;
1111 pixel.y = c01;
1112 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1113 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1114 pixel.x = pixel.z;
1115 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1116 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1117 pixel.y = pixel.z;
1118 pixel.w = pixel.x;
1119 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1120 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1121 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1122 pixel.w = Short4(0xFFFFu);
1123 break;
John Bauman89401822014-05-06 15:04:28 -04001124 case FORMAT_A8G8R8B8Q:
1125 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001126 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1127 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1128 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1129 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8));
John Bauman89401822014-05-06 15:04:28 -04001130 break;
1131 case FORMAT_X8G8R8B8Q:
1132 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001133 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1134 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1135 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1136 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001137 break;
1138 case FORMAT_A16B16G16R16:
Maxime Grégoired9762742015-07-08 16:43:48 -04001139 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001140 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1141 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001142 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001143 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1144 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1145 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04001146 break;
1147 case FORMAT_G16R16:
1148 buffer = cBuffer;
Maxime Grégoired9762742015-07-08 16:43:48 -04001149 pixel.x = *Pointer<Short4>(buffer + 4 * x);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001150 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Maxime Grégoired9762742015-07-08 16:43:48 -04001151 pixel.y = *Pointer<Short4>(buffer + 4 * x);
John Bauman19bac1e2014-05-06 15:23:49 -04001152 pixel.z = pixel.x;
1153 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1154 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1155 pixel.y = pixel.z;
1156 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1157 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1158 pixel.z = Short4(0xFFFFu);
1159 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001160 break;
1161 default:
1162 ASSERT(false);
1163 }
1164
Alexis Hetu049a1872016-04-25 16:59:58 -04001165 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001166 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001167 sRGBtoLinear16_12_16(pixel);
John Bauman89401822014-05-06 15:04:28 -04001168 }
Maxime Grégoired9762742015-07-08 16:43:48 -04001169 }
1170
Nicolas Capens4f172c72016-01-13 08:34:30 -05001171 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001172 {
1173 if(!state.alphaBlendActive)
1174 {
1175 return;
1176 }
1177
1178 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001179 readPixel(index, cBuffer, x, pixel);
John Bauman89401822014-05-06 15:04:28 -04001180
1181 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
Alexis Hetu96517182015-04-15 10:30:23 -04001182 Vector4s sourceFactor;
1183 Vector4s destFactor;
John Bauman89401822014-05-06 15:04:28 -04001184
Nicolas Capens4f172c72016-01-13 08:34:30 -05001185 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor);
1186 blendFactor(destFactor, current, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04001187
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001188 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001189 {
John Bauman19bac1e2014-05-06 15:23:49 -04001190 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1191 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1192 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001193 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001194
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001195 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001196 {
John Bauman19bac1e2014-05-06 15:23:49 -04001197 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1198 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1199 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001200 }
1201
1202 switch(state.blendOperation)
1203 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001204 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001205 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1206 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1207 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001208 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001209 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001210 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1211 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1212 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001213 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001214 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001215 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1216 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1217 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
John Bauman89401822014-05-06 15:04:28 -04001218 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001219 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001220 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1221 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1222 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001223 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001224 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001225 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1226 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1227 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001228 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001229 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001230 // No operation
1231 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001232 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001233 current.x = pixel.x;
1234 current.y = pixel.y;
1235 current.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001236 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001237 case BLENDOP_NULL:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001238 current.x = Short4(0x0000);
1239 current.y = Short4(0x0000);
1240 current.z = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04001241 break;
1242 default:
1243 ASSERT(false);
1244 }
1245
Nicolas Capens4f172c72016-01-13 08:34:30 -05001246 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1247 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04001248
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001249 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001250 {
John Bauman19bac1e2014-05-06 15:23:49 -04001251 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001252 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001253
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001254 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001255 {
John Bauman19bac1e2014-05-06 15:23:49 -04001256 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001257 }
1258
1259 switch(state.blendOperationAlpha)
1260 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001261 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001262 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001263 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001264 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001265 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001266 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001267 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001268 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
John Bauman89401822014-05-06 15:04:28 -04001269 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001270 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001271 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001272 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001273 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001274 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001275 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001276 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001277 // No operation
1278 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001279 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001280 current.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001281 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001282 case BLENDOP_NULL:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001283 current.w = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04001284 break;
1285 default:
1286 ASSERT(false);
1287 }
1288 }
1289
Nicolas Capens4f172c72016-01-13 08:34:30 -05001290 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001291 {
Nicolas Capens2afcc802015-08-04 10:34:43 -04001292 if(state.logicalOperation == LOGICALOP_COPY)
Maxime Grégoired9762742015-07-08 16:43:48 -04001293 {
1294 return;
1295 }
1296
1297 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001298 readPixel(index, cBuffer, x, pixel);
Maxime Grégoired9762742015-07-08 16:43:48 -04001299
1300 switch(state.logicalOperation)
1301 {
1302 case LOGICALOP_CLEAR:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001303 current.x = UShort4(0);
1304 current.y = UShort4(0);
1305 current.z = UShort4(0);
Maxime Grégoired9762742015-07-08 16:43:48 -04001306 break;
1307 case LOGICALOP_SET:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001308 current.x = UShort4(0xFFFFu);
1309 current.y = UShort4(0xFFFFu);
1310 current.z = UShort4(0xFFFFu);
Maxime Grégoired9762742015-07-08 16:43:48 -04001311 break;
1312 case LOGICALOP_COPY:
1313 ASSERT(false); // Optimized out
1314 break;
1315 case LOGICALOP_COPY_INVERTED:
1316 current.x = ~current.x;
1317 current.y = ~current.y;
1318 current.z = ~current.z;
1319 break;
1320 case LOGICALOP_NOOP:
1321 current.x = pixel.x;
1322 current.y = pixel.y;
1323 current.z = pixel.z;
1324 break;
1325 case LOGICALOP_INVERT:
1326 current.x = ~pixel.x;
1327 current.y = ~pixel.y;
1328 current.z = ~pixel.z;
1329 break;
1330 case LOGICALOP_AND:
1331 current.x = pixel.x & current.x;
1332 current.y = pixel.y & current.y;
1333 current.z = pixel.z & current.z;
1334 break;
1335 case LOGICALOP_NAND:
1336 current.x = ~(pixel.x & current.x);
1337 current.y = ~(pixel.y & current.y);
1338 current.z = ~(pixel.z & current.z);
1339 break;
1340 case LOGICALOP_OR:
1341 current.x = pixel.x | current.x;
1342 current.y = pixel.y | current.y;
1343 current.z = pixel.z | current.z;
1344 break;
1345 case LOGICALOP_NOR:
1346 current.x = ~(pixel.x | current.x);
1347 current.y = ~(pixel.y | current.y);
1348 current.z = ~(pixel.z | current.z);
1349 break;
1350 case LOGICALOP_XOR:
1351 current.x = pixel.x ^ current.x;
1352 current.y = pixel.y ^ current.y;
1353 current.z = pixel.z ^ current.z;
1354 break;
1355 case LOGICALOP_EQUIV:
1356 current.x = ~(pixel.x ^ current.x);
1357 current.y = ~(pixel.y ^ current.y);
1358 current.z = ~(pixel.z ^ current.z);
1359 break;
1360 case LOGICALOP_AND_REVERSE:
1361 current.x = ~pixel.x & current.x;
1362 current.y = ~pixel.y & current.y;
1363 current.z = ~pixel.z & current.z;
1364 break;
1365 case LOGICALOP_AND_INVERTED:
1366 current.x = pixel.x & ~current.x;
1367 current.y = pixel.y & ~current.y;
1368 current.z = pixel.z & ~current.z;
1369 break;
1370 case LOGICALOP_OR_REVERSE:
1371 current.x = ~pixel.x | current.x;
1372 current.y = ~pixel.y | current.y;
1373 current.z = ~pixel.z | current.z;
1374 break;
1375 case LOGICALOP_OR_INVERTED:
1376 current.x = pixel.x | ~current.x;
1377 current.y = pixel.y | ~current.y;
1378 current.z = pixel.z | ~current.z;
1379 break;
1380 default:
1381 ASSERT(false);
1382 }
1383 }
1384
Nicolas Capens4f172c72016-01-13 08:34:30 -05001385 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04001386 {
Alexis Hetu049a1872016-04-25 16:59:58 -04001387 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001388 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001389 linearToSRGB16_12_16(current);
John Bauman89401822014-05-06 15:04:28 -04001390 }
1391
1392 if(exactColorRounding)
1393 {
1394 switch(state.targetFormat[index])
1395 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001396 case FORMAT_R5G6B5:
Nicolas Capens26f37222015-09-22 09:53:45 -04001397 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1398 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1399 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001400 break;
John Bauman89401822014-05-06 15:04:28 -04001401 case FORMAT_X8G8R8B8Q:
1402 case FORMAT_A8G8R8B8Q:
1403 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001404 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04001405 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001406 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001407 case FORMAT_SRGB8_X8:
1408 case FORMAT_SRGB8_A8:
Alexis Hetu143dfc72016-09-13 18:41:27 -04001409 case FORMAT_G8R8:
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001410 case FORMAT_R8:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001411 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1412 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1413 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1414 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
John Bauman89401822014-05-06 15:04:28 -04001415 break;
Nicolas Capensb69aa272016-01-02 00:06:41 -05001416 default:
1417 break;
John Bauman89401822014-05-06 15:04:28 -04001418 }
1419 }
1420
1421 int rgbaWriteMask = state.colorWriteActive(index);
Nicolas Capens3b396462016-01-02 00:23:53 -05001422 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
John Bauman89401822014-05-06 15:04:28 -04001423
1424 switch(state.targetFormat[index])
1425 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001426 case FORMAT_R5G6B5:
1427 {
1428 current.x = current.x & Short4(0xF800u);
1429 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1430 current.z = As<UShort4>(current.z) >> 11;
1431
1432 current.x = current.x | current.y | current.z;
1433 }
1434 break;
John Bauman89401822014-05-06 15:04:28 -04001435 case FORMAT_X8G8R8B8Q:
1436 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001437 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1438 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1439 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001440
John Bauman19bac1e2014-05-06 15:23:49 -04001441 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1442 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001443 break;
1444 case FORMAT_A8G8R8B8Q:
1445 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001446 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1447 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1448 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1449 // current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001450
John Bauman19bac1e2014-05-06 15:23:49 -04001451 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1452 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001453 break;
1454 case FORMAT_X8R8G8B8:
1455 case FORMAT_A8R8G8B8:
1456 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
1457 {
John Bauman19bac1e2014-05-06 15:23:49 -04001458 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1459 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1460 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001461
Nicolas Capens33438a62017-09-27 11:47:35 -04001462 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1463 current.y = As<Short4>(PackUnsigned(current.y, current.y));
John Bauman89401822014-05-06 15:04:28 -04001464
John Bauman19bac1e2014-05-06 15:23:49 -04001465 current.x = current.z;
1466 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1467 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1468 current.y = current.z;
1469 current.z = As<Short4>(UnpackLow(current.z, current.x));
1470 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001471 }
1472 else
1473 {
John Bauman19bac1e2014-05-06 15:23:49 -04001474 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1475 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1476 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1477 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001478
Nicolas Capens33438a62017-09-27 11:47:35 -04001479 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1480 current.y = As<Short4>(PackUnsigned(current.y, current.w));
John Bauman89401822014-05-06 15:04:28 -04001481
John Bauman19bac1e2014-05-06 15:23:49 -04001482 current.x = current.z;
1483 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1484 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1485 current.y = current.z;
1486 current.z = As<Short4>(UnpackLow(current.z, current.x));
1487 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001488 }
1489 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001490 case FORMAT_X8B8G8R8:
1491 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001492 case FORMAT_SRGB8_X8:
1493 case FORMAT_SRGB8_A8:
1494 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7)
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001495 {
1496 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1497 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1498 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1499
Nicolas Capens33438a62017-09-27 11:47:35 -04001500 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1501 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001502
1503 current.x = current.z;
1504 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1505 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1506 current.y = current.z;
1507 current.z = As<Short4>(UnpackLow(current.z, current.x));
1508 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1509 }
1510 else
1511 {
1512 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1513 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1514 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1515 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1516
Nicolas Capens33438a62017-09-27 11:47:35 -04001517 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1518 current.y = As<Short4>(PackUnsigned(current.y, current.w));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001519
1520 current.x = current.z;
1521 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1522 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1523 current.y = current.z;
1524 current.z = As<Short4>(UnpackLow(current.z, current.x));
1525 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1526 }
1527 break;
Alexis Hetu143dfc72016-09-13 18:41:27 -04001528 case FORMAT_G8R8:
1529 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1530 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001531 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1532 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Alexis Hetu143dfc72016-09-13 18:41:27 -04001533 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1534 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001535 case FORMAT_R8:
1536 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001537 current.x = As<Short4>(PackUnsigned(current.x, current.x));
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001538 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001539 case FORMAT_A8:
1540 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001541 current.w = As<Short4>(PackUnsigned(current.w, current.w));
John Bauman66b8ab22014-05-06 15:57:45 -04001542 break;
John Bauman89401822014-05-06 15:04:28 -04001543 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001544 current.z = current.x;
1545 current.x = As<Short4>(UnpackLow(current.x, current.y));
1546 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1547 current.y = current.z;
John Bauman89401822014-05-06 15:04:28 -04001548 break;
1549 case FORMAT_A16B16G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001550 transpose4x4(current.x, current.y, current.z, current.w);
John Bauman89401822014-05-06 15:04:28 -04001551 break;
John Bauman89401822014-05-06 15:04:28 -04001552 default:
1553 ASSERT(false);
1554 }
1555
John Bauman19bac1e2014-05-06 15:23:49 -04001556 Short4 c01 = current.z;
1557 Short4 c23 = current.y;
John Bauman89401822014-05-06 15:04:28 -04001558
1559 Int xMask; // Combination of all masks
1560
1561 if(state.depthTestActive)
1562 {
1563 xMask = zMask;
1564 }
1565 else
1566 {
1567 xMask = cMask;
1568 }
1569
1570 if(state.stencilActive)
1571 {
1572 xMask &= sMask;
1573 }
1574
John Bauman89401822014-05-06 15:04:28 -04001575 switch(state.targetFormat[index])
1576 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001577 case FORMAT_R5G6B5:
1578 {
1579 Pointer<Byte> buffer = cBuffer + 2 * x;
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001580 Int value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001581
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001582 Int c01 = Extract(As<Int2>(current.x), 0);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001583
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001584 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001585 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001586 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001587 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001588 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001589 c01 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001590 }
1591
Nicolas Capens4f172c72016-01-13 08:34:30 -05001592 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1593 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001594 c01 |= value;
1595 *Pointer<Int>(buffer) = c01;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001596
Nicolas Capens4f172c72016-01-13 08:34:30 -05001597 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001598 value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001599
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001600 Int c23 = Extract(As<Int2>(current.x), 1);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001601
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001602 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001603 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001604 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001605 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001606 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001607 c23 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001608 }
1609
Nicolas Capens4f172c72016-01-13 08:34:30 -05001610 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1611 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001612 c23 |= value;
1613 *Pointer<Int>(buffer) = c23;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001614 }
1615 break;
John Bauman89401822014-05-06 15:04:28 -04001616 case FORMAT_A8G8R8B8Q:
1617 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
1618 UNIMPLEMENTED();
1619 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
1620
1621 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1622 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1623 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1624 // {
1625 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001626 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1627 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001628 // c01 |= masked;
1629 // }
1630
Nicolas Capens4f172c72016-01-13 08:34:30 -05001631 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1632 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001633 // c01 |= value;
1634 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
1635
1636 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
1637
1638 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1639 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1640 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1641 // {
1642 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001643 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1644 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001645 // c23 |= masked;
1646 // }
1647
Nicolas Capens4f172c72016-01-13 08:34:30 -05001648 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1649 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001650 // c23 |= value;
1651 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
1652 break;
1653 case FORMAT_A8R8G8B8:
1654 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
John Bauman89401822014-05-06 15:04:28 -04001655 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001656 Pointer<Byte> buffer = cBuffer + x * 4;
1657 Short4 value = *Pointer<Short4>(buffer);
1658
1659 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1660 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1661 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1662 {
1663 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001664 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1665 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001666 c01 |= masked;
1667 }
1668
Nicolas Capens4f172c72016-01-13 08:34:30 -05001669 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1670 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001671 c01 |= value;
1672 *Pointer<Short4>(buffer) = c01;
1673
Nicolas Capens4f172c72016-01-13 08:34:30 -05001674 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001675 value = *Pointer<Short4>(buffer);
1676
1677 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1678 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1679 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1680 {
1681 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001682 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1683 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001684 c23 |= masked;
1685 }
1686
Nicolas Capens4f172c72016-01-13 08:34:30 -05001687 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1688 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001689 c23 |= value;
1690 *Pointer<Short4>(buffer) = c23;
John Bauman89401822014-05-06 15:04:28 -04001691 }
John Bauman89401822014-05-06 15:04:28 -04001692 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001693 case FORMAT_A8B8G8R8:
1694 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha?
Alexis Hetu049a1872016-04-25 16:59:58 -04001695 case FORMAT_SRGB8_X8:
1696 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001697 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001698 Pointer<Byte> buffer = cBuffer + x * 4;
1699 Short4 value = *Pointer<Short4>(buffer);
1700
Alexis Hetu049a1872016-04-25 16:59:58 -04001701 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) ||
1702 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) &&
1703 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh?
1704
1705 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001706 {
1707 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001708 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1709 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001710 c01 |= masked;
1711 }
1712
Nicolas Capens4f172c72016-01-13 08:34:30 -05001713 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1714 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001715 c01 |= value;
1716 *Pointer<Short4>(buffer) = c01;
1717
Nicolas Capens4f172c72016-01-13 08:34:30 -05001718 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001719 value = *Pointer<Short4>(buffer);
1720
Alexis Hetu049a1872016-04-25 16:59:58 -04001721 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001722 {
1723 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001724 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1725 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001726 c23 |= masked;
1727 }
1728
Nicolas Capens4f172c72016-01-13 08:34:30 -05001729 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1730 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001731 c23 |= value;
1732 *Pointer<Short4>(buffer) = c23;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001733 }
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001734 break;
Alexis Hetu143dfc72016-09-13 18:41:27 -04001735 case FORMAT_G8R8:
1736 if((rgbaWriteMask & 0x00000003) != 0x0)
1737 {
1738 Pointer<Byte> buffer = cBuffer + 2 * x;
1739 Int2 value;
1740 value = Insert(value, *Pointer<Int>(buffer), 0);
1741 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1742 value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
1743
1744 Int2 packedCol = As<Int2>(current.x);
1745
1746 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1747 if((rgbaWriteMask & 0x3) != 0x3)
1748 {
1749 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1750 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1751 mergedMask &= rgbaMask;
1752 }
1753
1754 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1755
1756 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
1757 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
1758 }
1759 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001760 case FORMAT_R8:
1761 if(rgbaWriteMask & 0x00000001)
1762 {
1763 Pointer<Byte> buffer = cBuffer + 1 * x;
1764 Short4 value;
Alexis Hetub14ed802016-06-15 13:06:50 -04001765 value = Insert(value, *Pointer<Short>(buffer), 0);
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001766 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetub14ed802016-06-15 13:06:50 -04001767 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001768
1769 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1770 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1771 current.x |= value;
1772
1773 *Pointer<Short>(buffer) = Extract(current.x, 0);
1774 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
1775 }
1776 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001777 case FORMAT_A8:
1778 if(rgbaWriteMask & 0x00000008)
1779 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001780 Pointer<Byte> buffer = cBuffer + 1 * x;
1781 Short4 value;
Alexis Hetub14ed802016-06-15 13:06:50 -04001782 value = Insert(value, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001783 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Alexis Hetub14ed802016-06-15 13:06:50 -04001784 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
John Bauman66b8ab22014-05-06 15:57:45 -04001785
Nicolas Capens4f172c72016-01-13 08:34:30 -05001786 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask);
1787 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask);
John Bauman66b8ab22014-05-06 15:57:45 -04001788 current.w |= value;
1789
1790 *Pointer<Short>(buffer) = Extract(current.w, 0);
1791 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1);
1792 }
1793 break;
John Bauman89401822014-05-06 15:04:28 -04001794 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04001795 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001796 Pointer<Byte> buffer = cBuffer + 4 * x;
John Bauman89401822014-05-06 15:04:28 -04001797
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001798 Short4 value = *Pointer<Short4>(buffer);
John Bauman89401822014-05-06 15:04:28 -04001799
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001800 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001801 {
1802 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001803 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001804 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001805 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04001806 }
1807
Nicolas Capens4f172c72016-01-13 08:34:30 -05001808 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1809 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001810 current.x |= value;
1811 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001812
Nicolas Capens4f172c72016-01-13 08:34:30 -05001813 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001814
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001815 value = *Pointer<Short4>(buffer);
1816
1817 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001818 {
1819 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001820 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001821 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001822 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04001823 }
1824
Nicolas Capens4f172c72016-01-13 08:34:30 -05001825 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1826 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001827 current.y |= value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001828 *Pointer<Short4>(buffer) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001829 }
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001830 break;
1831 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04001832 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001833 Pointer<Byte> buffer = cBuffer + 8 * x;
John Bauman89401822014-05-06 15:04:28 -04001834
John Bauman89401822014-05-06 15:04:28 -04001835 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001836 Short4 value = *Pointer<Short4>(buffer);
1837
1838 if(rgbaWriteMask != 0x0000000F)
1839 {
1840 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001841 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1842 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001843 current.x |= masked;
1844 }
1845
Nicolas Capens4f172c72016-01-13 08:34:30 -05001846 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1847 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001848 current.x |= value;
1849 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001850 }
1851
John Bauman89401822014-05-06 15:04:28 -04001852 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001853 Short4 value = *Pointer<Short4>(buffer + 8);
1854
1855 if(rgbaWriteMask != 0x0000000F)
1856 {
1857 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001858 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1859 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001860 current.y |= masked;
1861 }
1862
Nicolas Capens4f172c72016-01-13 08:34:30 -05001863 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1864 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001865 current.y |= value;
1866 *Pointer<Short4>(buffer + 8) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001867 }
1868
Nicolas Capens4f172c72016-01-13 08:34:30 -05001869 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001870
1871 {
1872 Short4 value = *Pointer<Short4>(buffer);
1873
1874 if(rgbaWriteMask != 0x0000000F)
1875 {
1876 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001877 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1878 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001879 current.z |= masked;
1880 }
1881
Nicolas Capens4f172c72016-01-13 08:34:30 -05001882 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1883 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001884 current.z |= value;
1885 *Pointer<Short4>(buffer) = current.z;
1886 }
1887
1888 {
1889 Short4 value = *Pointer<Short4>(buffer + 8);
1890
1891 if(rgbaWriteMask != 0x0000000F)
1892 {
1893 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001894 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1895 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001896 current.w |= masked;
1897 }
1898
Nicolas Capens4f172c72016-01-13 08:34:30 -05001899 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1900 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001901 current.w |= value;
1902 *Pointer<Short4>(buffer + 8) = current.w;
1903 }
John Bauman89401822014-05-06 15:04:28 -04001904 }
1905 break;
1906 default:
1907 ASSERT(false);
1908 }
1909 }
1910
Nicolas Capens96d4e092016-11-18 14:22:38 -05001911 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04001912 {
1913 switch(blendFactorActive)
1914 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001915 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001916 // Optimized
1917 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001918 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001919 // Optimized
1920 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001921 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001922 blendFactor.x = oC.x;
1923 blendFactor.y = oC.y;
1924 blendFactor.z = oC.z;
John Bauman89401822014-05-06 15:04:28 -04001925 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001926 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001927 blendFactor.x = Float4(1.0f) - oC.x;
1928 blendFactor.y = Float4(1.0f) - oC.y;
1929 blendFactor.z = Float4(1.0f) - oC.z;
John Bauman89401822014-05-06 15:04:28 -04001930 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001931 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001932 blendFactor.x = pixel.x;
1933 blendFactor.y = pixel.y;
1934 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001935 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001936 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001937 blendFactor.x = Float4(1.0f) - pixel.x;
1938 blendFactor.y = Float4(1.0f) - pixel.y;
1939 blendFactor.z = Float4(1.0f) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001940 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001941 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001942 blendFactor.x = oC.w;
1943 blendFactor.y = oC.w;
1944 blendFactor.z = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001945 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001946 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001947 blendFactor.x = Float4(1.0f) - oC.w;
1948 blendFactor.y = Float4(1.0f) - oC.w;
1949 blendFactor.z = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001950 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001951 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001952 blendFactor.x = pixel.w;
1953 blendFactor.y = pixel.w;
1954 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001955 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001956 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001957 blendFactor.x = Float4(1.0f) - pixel.w;
1958 blendFactor.y = Float4(1.0f) - pixel.w;
1959 blendFactor.z = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001960 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001961 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001962 blendFactor.x = Float4(1.0f) - pixel.w;
1963 blendFactor.x = Min(blendFactor.x, oC.w);
1964 blendFactor.y = blendFactor.x;
1965 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04001966 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001967 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001968 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
1969 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
1970 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001971 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001972 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001973 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1974 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1975 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001976 break;
1977 default:
1978 ASSERT(false);
1979 }
1980 }
1981
Nicolas Capens96d4e092016-11-18 14:22:38 -05001982 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04001983 {
1984 switch(blendFactorAlphaActive)
1985 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001986 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001987 // Optimized
1988 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001989 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001990 // Optimized
1991 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001992 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001993 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001994 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001995 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001996 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001997 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001998 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001999 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002000 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002001 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002002 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002003 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002004 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002005 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002006 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002007 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002008 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04002009 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002010 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002011 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002012 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002013 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002014 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002015 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002016 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04002017 blendFactor.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04002018 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002019 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05002020 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04002021 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002022 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05002023 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04002024 break;
2025 default:
2026 ASSERT(false);
2027 }
2028 }
2029
Nicolas Capens4f172c72016-01-13 08:34:30 -05002030 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
John Bauman89401822014-05-06 15:04:28 -04002031 {
2032 if(!state.alphaBlendActive)
2033 {
2034 return;
2035 }
2036
2037 Pointer<Byte> buffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002038 Vector4f pixel;
John Bauman89401822014-05-06 15:04:28 -04002039
Alexis Hetu96517182015-04-15 10:30:23 -04002040 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04002041 Short4 c01;
2042 Short4 c23;
2043
Alexis Hetu1abb6382016-02-08 11:21:16 -05002044 Float4 one;
Alexis Hetu7208e932016-06-02 11:19:24 -04002045 if(Surface::isFloatFormat(state.targetFormat[index]))
John Bauman89401822014-05-06 15:04:28 -04002046 {
Alexis Hetu1abb6382016-02-08 11:21:16 -05002047 one = Float4(1.0f);
Alexis Hetu7208e932016-06-02 11:19:24 -04002048 }
2049 else if(Surface::isNonNormalizedInteger(state.targetFormat[index]))
2050 {
2051 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
Alexis Hetu1abb6382016-02-08 11:21:16 -05002052 }
2053
2054 switch(state.targetFormat[index])
2055 {
2056 case FORMAT_R32I:
2057 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002058 case FORMAT_R32F:
2059 buffer = cBuffer;
2060 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002061 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
2062 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002063 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002064 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002065 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
2066 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
Alexis Hetu1abb6382016-02-08 11:21:16 -05002067 pixel.y = pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002068 break;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002069 case FORMAT_G32R32I:
2070 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002071 case FORMAT_G32R32F:
2072 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002073 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002074 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002075 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
2076 pixel.z = pixel.x;
2077 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
2078 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
2079 pixel.y = pixel.z;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002080 pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002081 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002082 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002083 case FORMAT_A32B32G32R32F:
Nicolas Capens67fdd832017-12-21 11:20:54 -05002084 case FORMAT_X32B32G32R32F_UNSIGNED:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002085 case FORMAT_A32B32G32R32I:
2086 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002087 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002088 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
2089 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002090 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002091 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
2092 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
2093 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
Nicolas Capens67fdd832017-12-21 11:20:54 -05002094 if(state.targetFormat[index] == FORMAT_X32B32G32R32F ||
2095 state.targetFormat[index] == FORMAT_X32B32G32R32F_UNSIGNED)
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002096 {
2097 pixel.w = Float4(1.0f);
2098 }
John Bauman89401822014-05-06 15:04:28 -04002099 break;
2100 default:
2101 ASSERT(false);
2102 }
2103
Alexis Hetu049a1872016-04-25 16:59:58 -04002104 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04002105 {
John Bauman19bac1e2014-05-06 15:23:49 -04002106 sRGBtoLinear(pixel.x);
2107 sRGBtoLinear(pixel.y);
2108 sRGBtoLinear(pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002109 }
2110
2111 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
John Bauman19bac1e2014-05-06 15:23:49 -04002112 Vector4f sourceFactor;
2113 Vector4f destFactor;
John Bauman89401822014-05-06 15:04:28 -04002114
Nicolas Capens4f172c72016-01-13 08:34:30 -05002115 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor);
2116 blendFactor(destFactor, oC, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04002117
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002118 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002119 {
John Bauman19bac1e2014-05-06 15:23:49 -04002120 oC.x *= sourceFactor.x;
2121 oC.y *= sourceFactor.y;
2122 oC.z *= sourceFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002123 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002124
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002125 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002126 {
John Bauman19bac1e2014-05-06 15:23:49 -04002127 pixel.x *= destFactor.x;
2128 pixel.y *= destFactor.y;
2129 pixel.z *= destFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002130 }
2131
2132 switch(state.blendOperation)
2133 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002134 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002135 oC.x += pixel.x;
2136 oC.y += pixel.y;
2137 oC.z += pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002138 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002139 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002140 oC.x -= pixel.x;
2141 oC.y -= pixel.y;
2142 oC.z -= pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002143 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002144 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002145 oC.x = pixel.x - oC.x;
2146 oC.y = pixel.y - oC.y;
2147 oC.z = pixel.z - oC.z;
John Bauman89401822014-05-06 15:04:28 -04002148 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002149 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002150 oC.x = Min(oC.x, pixel.x);
2151 oC.y = Min(oC.y, pixel.y);
2152 oC.z = Min(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002153 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002154 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002155 oC.x = Max(oC.x, pixel.x);
2156 oC.y = Max(oC.y, pixel.y);
2157 oC.z = Max(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002158 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002159 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002160 // No operation
2161 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002162 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002163 oC.x = pixel.x;
2164 oC.y = pixel.y;
2165 oC.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002166 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002167 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002168 oC.x = Float4(0.0f);
2169 oC.y = Float4(0.0f);
2170 oC.z = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002171 break;
2172 default:
2173 ASSERT(false);
2174 }
2175
Nicolas Capens4f172c72016-01-13 08:34:30 -05002176 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
2177 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04002178
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002179 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002180 {
John Bauman19bac1e2014-05-06 15:23:49 -04002181 oC.w *= sourceFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002182 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002183
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002184 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002185 {
John Bauman19bac1e2014-05-06 15:23:49 -04002186 pixel.w *= destFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002187 }
2188
2189 switch(state.blendOperationAlpha)
2190 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002191 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002192 oC.w += pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002193 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002194 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002195 oC.w -= pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002196 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002197 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002198 pixel.w -= oC.w;
2199 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002200 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002201 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002202 oC.w = Min(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002203 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002204 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002205 oC.w = Max(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002206 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002207 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002208 // No operation
2209 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002210 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002211 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002212 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002213 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002214 oC.w = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002215 break;
2216 default:
2217 ASSERT(false);
2218 }
2219 }
2220
Nicolas Capens4f172c72016-01-13 08:34:30 -05002221 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04002222 {
John Bauman89401822014-05-06 15:04:28 -04002223 switch(state.targetFormat[index])
2224 {
John Bauman89401822014-05-06 15:04:28 -04002225 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002226 case FORMAT_R32I:
2227 case FORMAT_R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002228 case FORMAT_R16I:
2229 case FORMAT_R16UI:
2230 case FORMAT_R8I:
2231 case FORMAT_R8UI:
John Bauman89401822014-05-06 15:04:28 -04002232 break;
2233 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002234 case FORMAT_G32R32I:
2235 case FORMAT_G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002236 case FORMAT_G16R16I:
2237 case FORMAT_G16R16UI:
2238 case FORMAT_G8R8I:
2239 case FORMAT_G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002240 oC.z = oC.x;
2241 oC.x = UnpackLow(oC.x, oC.y);
2242 oC.z = UnpackHigh(oC.z, oC.y);
2243 oC.y = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002244 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002245 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002246 case FORMAT_A32B32G32R32F:
Nicolas Capens67fdd832017-12-21 11:20:54 -05002247 case FORMAT_X32B32G32R32F_UNSIGNED:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002248 case FORMAT_A32B32G32R32I:
2249 case FORMAT_A32B32G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002250 case FORMAT_A16B16G16R16I:
2251 case FORMAT_A16B16G16R16UI:
2252 case FORMAT_A8B8G8R8I:
2253 case FORMAT_A8B8G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002254 transpose4x4(oC.x, oC.y, oC.z, oC.w);
John Bauman89401822014-05-06 15:04:28 -04002255 break;
2256 default:
2257 ASSERT(false);
2258 }
2259
2260 int rgbaWriteMask = state.colorWriteActive(index);
2261
2262 Int xMask; // Combination of all masks
2263
2264 if(state.depthTestActive)
2265 {
2266 xMask = zMask;
2267 }
2268 else
2269 {
2270 xMask = cMask;
2271 }
2272
2273 if(state.stencilActive)
2274 {
2275 xMask &= sMask;
2276 }
2277
2278 Pointer<Byte> buffer;
2279 Float4 value;
2280
2281 switch(state.targetFormat[index])
2282 {
2283 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002284 case FORMAT_R32I:
2285 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002286 if(rgbaWriteMask & 0x00000001)
2287 {
2288 buffer = cBuffer + 4 * x;
2289
2290 // FIXME: movlps
2291 value.x = *Pointer<Float>(buffer + 0);
2292 value.y = *Pointer<Float>(buffer + 4);
2293
Nicolas Capens4f172c72016-01-13 08:34:30 -05002294 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002295
2296 // FIXME: movhps
2297 value.z = *Pointer<Float>(buffer + 0);
2298 value.w = *Pointer<Float>(buffer + 4);
2299
Nicolas Capens4f172c72016-01-13 08:34:30 -05002300 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
2301 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002302 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
John Bauman89401822014-05-06 15:04:28 -04002303
2304 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002305 *Pointer<Float>(buffer + 0) = oC.x.z;
2306 *Pointer<Float>(buffer + 4) = oC.x.w;
John Bauman89401822014-05-06 15:04:28 -04002307
Nicolas Capens4f172c72016-01-13 08:34:30 -05002308 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002309
2310 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002311 *Pointer<Float>(buffer + 0) = oC.x.x;
2312 *Pointer<Float>(buffer + 4) = oC.x.y;
John Bauman89401822014-05-06 15:04:28 -04002313 }
2314 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002315 case FORMAT_R16I:
2316 case FORMAT_R16UI:
2317 if(rgbaWriteMask & 0x00000001)
2318 {
2319 buffer = cBuffer + 2 * x;
2320
2321 UShort4 xyzw;
2322 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2323
2324 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2325
2326 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2327 value = As<Float4>(Int4(xyzw));
2328
2329 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2330 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2331 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2332
2333 if(state.targetFormat[index] == FORMAT_R16I)
2334 {
2335 Float component = oC.x.z;
2336 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2337 component = oC.x.w;
2338 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2339
2340 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2341
2342 component = oC.x.x;
2343 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2344 component = oC.x.y;
2345 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2346 }
2347 else // FORMAT_R16UI
2348 {
2349 Float component = oC.x.z;
2350 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2351 component = oC.x.w;
2352 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2353
2354 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2355
2356 component = oC.x.x;
2357 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2358 component = oC.x.y;
2359 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2360 }
2361 }
2362 break;
2363 case FORMAT_R8I:
2364 case FORMAT_R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002365 if(rgbaWriteMask & 0x00000001)
2366 {
2367 buffer = cBuffer + x;
2368
2369 UInt xyzw, packedCol;
2370
Alexis Hetu827d07a2016-09-15 17:54:05 -04002371 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002372 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetu827d07a2016-09-15 17:54:05 -04002373 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002374
2375 Short4 tmpCol = Short4(As<Int4>(oC.x));
2376 if(state.targetFormat[index] == FORMAT_R8I)
2377 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002378 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002379 }
2380 else
2381 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002382 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002383 }
2384 packedCol = Extract(As<Int2>(tmpCol), 0);
2385
2386 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2387 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2388
2389 *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
2390 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2391 *Pointer<UShort>(buffer) = UShort(packedCol);
2392 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002393 break;
John Bauman89401822014-05-06 15:04:28 -04002394 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002395 case FORMAT_G32R32I:
2396 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002397 buffer = cBuffer + 8 * x;
2398
2399 value = *Pointer<Float4>(buffer);
2400
2401 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2402 {
2403 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002404 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04002405 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002406 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002407 }
2408
Nicolas Capens4f172c72016-01-13 08:34:30 -05002409 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
2410 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002411 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2412 *Pointer<Float4>(buffer) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002413
Nicolas Capens4f172c72016-01-13 08:34:30 -05002414 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002415
2416 value = *Pointer<Float4>(buffer);
2417
2418 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2419 {
2420 Float4 masked;
2421
2422 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002423 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04002424 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002425 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002426 }
2427
Nicolas Capens4f172c72016-01-13 08:34:30 -05002428 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
2429 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002430 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2431 *Pointer<Float4>(buffer) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002432 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002433 case FORMAT_G16R16I:
2434 case FORMAT_G16R16UI:
Meng-Lin Wu1978ff72016-06-20 15:08:42 -04002435 if((rgbaWriteMask & 0x00000003) != 0x0)
2436 {
2437 buffer = cBuffer + 4 * x;
2438
2439 UInt2 rgbaMask;
2440 UShort4 packedCol = UShort4(As<Int4>(oC.x));
2441 UShort4 value = *Pointer<UShort4>(buffer);
2442 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2443 if((rgbaWriteMask & 0x3) != 0x3)
2444 {
2445 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2446 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2447 mergedMask &= rgbaMask;
2448 }
2449 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2450
2451 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2452
2453 packedCol = UShort4(As<Int4>(oC.y));
2454 value = *Pointer<UShort4>(buffer);
2455 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2456 if((rgbaWriteMask & 0x3) != 0x3)
2457 {
2458 mergedMask &= rgbaMask;
2459 }
2460 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2461 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002462 break;
2463 case FORMAT_G8R8I:
2464 case FORMAT_G8R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002465 if((rgbaWriteMask & 0x00000003) != 0x0)
2466 {
2467 buffer = cBuffer + 2 * x;
2468
2469 Int2 xyzw, packedCol;
2470
2471 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
2472 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2473 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2474
2475 if(state.targetFormat[index] == FORMAT_G8R8I)
2476 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002477 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002478 }
2479 else
2480 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002481 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002482 }
2483
2484 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2485 if((rgbaWriteMask & 0x3) != 0x3)
2486 {
2487 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2488 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2489 mergedMask &= rgbaMask;
2490 }
2491
2492 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2493
2494 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
2495 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2496 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2497 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002498 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002499 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002500 case FORMAT_A32B32G32R32F:
Nicolas Capens67fdd832017-12-21 11:20:54 -05002501 case FORMAT_X32B32G32R32F_UNSIGNED:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002502 case FORMAT_A32B32G32R32I:
2503 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002504 buffer = cBuffer + 16 * x;
2505
2506 {
2507 value = *Pointer<Float4>(buffer, 16);
2508
2509 if(rgbaWriteMask != 0x0000000F)
2510 {
2511 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002512 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2513 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002514 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002515 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002516
Nicolas Capens4f172c72016-01-13 08:34:30 -05002517 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
2518 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002519 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2520 *Pointer<Float4>(buffer, 16) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002521 }
2522
2523 {
2524 value = *Pointer<Float4>(buffer + 16, 16);
2525
2526 if(rgbaWriteMask != 0x0000000F)
Nicolas Capens05b3d662016-02-25 23:58:33 -05002527 {
John Bauman89401822014-05-06 15:04:28 -04002528 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002529 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2530 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002531 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002532 }
2533
Nicolas Capens4f172c72016-01-13 08:34:30 -05002534 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
2535 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002536 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2537 *Pointer<Float4>(buffer + 16, 16) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002538 }
2539
Nicolas Capens4f172c72016-01-13 08:34:30 -05002540 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002541
2542 {
2543 value = *Pointer<Float4>(buffer, 16);
2544
2545 if(rgbaWriteMask != 0x0000000F)
2546 {
2547 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002548 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2549 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002550 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002551 }
2552
Nicolas Capens4f172c72016-01-13 08:34:30 -05002553 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
2554 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002555 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2556 *Pointer<Float4>(buffer, 16) = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002557 }
2558
2559 {
Nicolas Capens400667e2017-03-29 14:40:14 -04002560 value = *Pointer<Float4>(buffer + 16, 16);
John Bauman89401822014-05-06 15:04:28 -04002561
2562 if(rgbaWriteMask != 0x0000000F)
2563 {
2564 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002565 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2566 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002567 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002568 }
2569
Nicolas Capens4f172c72016-01-13 08:34:30 -05002570 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
2571 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002572 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2573 *Pointer<Float4>(buffer + 16, 16) = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002574 }
2575 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002576 case FORMAT_A16B16G16R16I:
2577 case FORMAT_A16B16G16R16UI:
Meng-Lin Wu1978ff72016-06-20 15:08:42 -04002578 if((rgbaWriteMask & 0x0000000F) != 0x0)
2579 {
2580 buffer = cBuffer + 8 * x;
2581
2582 UInt4 rgbaMask;
2583 UShort8 value = *Pointer<UShort8>(buffer);
2584 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
2585 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2586 if((rgbaWriteMask & 0xF) != 0xF)
2587 {
2588 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2589 rgbaMask = UInt4(tmpMask, tmpMask);
2590 mergedMask &= rgbaMask;
2591 }
2592 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2593
2594 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2595
2596 value = *Pointer<UShort8>(buffer);
2597 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
2598 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2599 if((rgbaWriteMask & 0xF) != 0xF)
2600 {
2601 mergedMask &= rgbaMask;
2602 }
2603 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2604 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002605 break;
2606 case FORMAT_A8B8G8R8I:
2607 case FORMAT_A8B8G8R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002608 if((rgbaWriteMask & 0x0000000F) != 0x0)
2609 {
2610 UInt2 value, packedCol, mergedMask;
2611
2612 buffer = cBuffer + 4 * x;
2613
2614 if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
2615 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002616 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002617 }
2618 else
2619 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002620 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002621 }
2622 value = *Pointer<UInt2>(buffer, 16);
2623 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2624 if(rgbaWriteMask != 0xF)
2625 {
2626 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2627 }
2628 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2629
2630 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2631
2632 if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
2633 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002634 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002635 }
2636 else
2637 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002638 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002639 }
2640 value = *Pointer<UInt2>(buffer, 16);
2641 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2642 if(rgbaWriteMask != 0xF)
2643 {
2644 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2645 }
2646 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2647 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002648 break;
John Bauman89401822014-05-06 15:04:28 -04002649 default:
2650 ASSERT(false);
2651 }
2652 }
2653
John Bauman89401822014-05-06 15:04:28 -04002654 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2655 {
John Bauman19bac1e2014-05-06 15:23:49 -04002656 return UShort4(cf * Float4(0xFFFF), saturate);
John Bauman89401822014-05-06 15:04:28 -04002657 }
2658
Nicolas Capens4f172c72016-01-13 08:34:30 -05002659 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002660 {
Nicolas Capens8f7739a2017-12-16 02:06:56 -05002661 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
2662
John Bauman19bac1e2014-05-06 15:23:49 -04002663 c.x = As<UShort4>(c.x) >> 4;
2664 c.y = As<UShort4>(c.y) >> 4;
2665 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002666
John Bauman19bac1e2014-05-06 15:23:49 -04002667 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2668 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2669 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2670 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002671
John Bauman19bac1e2014-05-06 15:23:49 -04002672 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2673 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2674 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2675 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002676
John Bauman19bac1e2014-05-06 15:23:49 -04002677 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2678 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2679 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2680 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002681 }
2682
Nicolas Capens4f172c72016-01-13 08:34:30 -05002683 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002684 {
John Bauman19bac1e2014-05-06 15:23:49 -04002685 c.x = As<UShort4>(c.x) >> 4;
2686 c.y = As<UShort4>(c.y) >> 4;
2687 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002688
Nicolas Capens4f172c72016-01-13 08:34:30 -05002689 linearToSRGB12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002690 }
2691
Nicolas Capens4f172c72016-01-13 08:34:30 -05002692 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002693 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002694 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
John Bauman89401822014-05-06 15:04:28 -04002695
John Bauman19bac1e2014-05-06 15:23:49 -04002696 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2697 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2698 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2699 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002700
John Bauman19bac1e2014-05-06 15:23:49 -04002701 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2702 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2703 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2704 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002705
John Bauman19bac1e2014-05-06 15:23:49 -04002706 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2707 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2708 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2709 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002710 }
2711
John Bauman89401822014-05-06 15:04:28 -04002712 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2713 {
2714 Float4 linear = x * x;
2715 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2716
2717 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2718 }
2719
John Bauman19bac1e2014-05-06 15:23:49 -04002720 bool PixelRoutine::colorUsed()
2721 {
2722 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
2723 }
John Bauman89401822014-05-06 15:04:28 -04002724}