blob: e510e6a546beaec9c9c41bc4266dc4d2092dad7a [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "PixelRoutine.hpp"
16
17#include "SamplerCore.hpp"
18#include "Constants.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050019#include "Device/Renderer.hpp"
20#include "Device/QuadRasterizer.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050021#include "Device/Primitive.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080022#include "Vulkan/VkDebug.hpp"
Chris Forbesc2968062019-03-19 16:48:03 -070023#include "Vulkan/VkPipelineLayout.hpp"
24
Nicolas Capens68a82382018-10-02 13:16:55 -040025namespace sw
26{
Ben Clayton76e9bc02019-02-26 15:02:18 +000027 PixelRoutine::PixelRoutine(
28 const PixelProcessor::State &state,
29 vk::PipelineLayout const *pipelineLayout,
Nicolas Capens09591b82019-04-08 22:51:08 -040030 SpirvShader const *spirvShader,
31 const vk::DescriptorSet::Bindings &descriptorSets)
Ben Clayton76e9bc02019-02-26 15:02:18 +000032 : QuadRasterizer(state, spirvShader),
Nicolas Capens09591b82019-04-08 22:51:08 -040033 routine(pipelineLayout),
34 descriptorSets(descriptorSets)
Nicolas Capens68a82382018-10-02 13:16:55 -040035 {
Chris Forbes6cf65f62019-05-15 13:00:40 -070036 if (spirvShader)
Nicolas Capens68a82382018-10-02 13:16:55 -040037 {
Chris Forbes6cf65f62019-05-15 13:00:40 -070038 spirvShader->emitProlog(&routine);
Alexis Hetu8af8b402019-05-28 14:48:19 -040039
40 // Clearing inputs to 0 is not demanded by the spec,
41 // but it makes the undefined behavior deterministic.
42 for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
Nicolas Capens68a82382018-10-02 13:16:55 -040043 {
Alexis Hetu8af8b402019-05-28 14:48:19 -040044 routine.inputs[i] = Float4(0.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -040045 }
46 }
47 }
48
49 PixelRoutine::~PixelRoutine()
50 {
51 }
52
53 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
54 {
Chris Forbeseae5b962019-04-19 17:01:10 -070055 // TODO: consider shader which modifies sample mask in general
Chris Forbes6cf65f62019-05-15 13:00:40 -070056 const bool earlyDepthTest = !spirvShader || (!spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
Nicolas Capens68a82382018-10-02 13:16:55 -040057
58 Int zMask[4]; // Depth mask
59 Int sMask[4]; // Stencil mask
60
61 for(unsigned int q = 0; q < state.multiSample; q++)
62 {
63 zMask[q] = cMask[q];
64 sMask[q] = cMask[q];
65 }
66
67 for(unsigned int q = 0; q < state.multiSample; q++)
68 {
69 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
70 }
71
72 Float4 f;
73 Float4 rhwCentroid;
74
75 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
76
77 if(interpolateZ())
78 {
79 for(unsigned int q = 0; q < state.multiSample; q++)
80 {
81 Float4 x = xxxx;
82
83 if(state.multiSample > 1)
84 {
85 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
86 }
87
88 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
89 }
90 }
91
92 Bool depthPass = false;
93
94 if(earlyDepthTest)
95 {
96 for(unsigned int q = 0; q < state.multiSample; q++)
97 {
98 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
99 }
100 }
101
102 If(depthPass || Bool(!earlyDepthTest))
103 {
Nicolas Capens68a82382018-10-02 13:16:55 -0400104 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
105
106 // Centroid locations
107 Float4 XXXX = Float4(0.0f);
108 Float4 YYYY = Float4(0.0f);
109
110 if(state.centroid)
111 {
112 Float4 WWWW(1.0e-9f);
113
114 for(unsigned int q = 0; q < state.multiSample; q++)
115 {
116 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
117 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
118 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
119 }
120
121 WWWW = Rcp_pp(WWWW);
122 XXXX *= WWWW;
123 YYYY *= WWWW;
124
125 XXXX += xxxx;
126 YYYY += yyyy;
127 }
128
129 if(interpolateW())
130 {
131 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false);
132 rhw = reciprocal(w, false, false, true);
133
134 if(state.centroid)
135 {
136 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
137 }
138 }
139
Chris Forbes6cf65f62019-05-15 13:00:40 -0700140 if (spirvShader)
Nicolas Capens68a82382018-10-02 13:16:55 -0400141 {
Chris Forbes6cf65f62019-05-15 13:00:40 -0700142 for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400143 {
Chris Forbes6cf65f62019-05-15 13:00:40 -0700144 auto const &input = spirvShader->inputs[interpolant];
145 if (input.Type != SpirvShader::ATTRIBTYPE_UNUSED)
Nicolas Capens68a82382018-10-02 13:16:55 -0400146 {
Chris Forbes6cf65f62019-05-15 13:00:40 -0700147 if (input.Centroid && state.multiSample > 1)
148 {
149 routine.inputs[interpolant] =
150 interpolateCentroid(XXXX, YYYY, rhwCentroid,
151 primitive + OFFSET(Primitive, V[interpolant]),
152 input.Flat, !input.NoPerspective);
153 }
154 else
155 {
156 routine.inputs[interpolant] =
157 interpolate(xxxx, Dv[interpolant], rhw,
158 primitive + OFFSET(Primitive, V[interpolant]),
159 input.Flat, !input.NoPerspective, false);
160 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400161 }
162 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400163
Chris Forbes6cf65f62019-05-15 13:00:40 -0700164 setBuiltins(x, y, z, w);
165 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400166
Nicolas Capens68a82382018-10-02 13:16:55 -0400167 Bool alphaPass = true;
168
Chris Forbes71a1e012019-04-22 14:18:34 -0700169 if (spirvShader)
Nicolas Capens68a82382018-10-02 13:16:55 -0400170 {
Nicolas Capens68a82382018-10-02 13:16:55 -0400171 applyShader(cMask);
Chris Forbes71a1e012019-04-22 14:18:34 -0700172 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400173
Chris Forbes71a1e012019-04-22 14:18:34 -0700174 alphaPass = alphaTest(cMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400175
Chris Forbes71a1e012019-04-22 14:18:34 -0700176 if((spirvShader && spirvShader->getModes().ContainsKill) || state.alphaToCoverage)
177 {
178 for(unsigned int q = 0; q < state.multiSample; q++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400179 {
Chris Forbes71a1e012019-04-22 14:18:34 -0700180 zMask[q] &= cMask[q];
181 sMask[q] &= cMask[q];
Nicolas Capens68a82382018-10-02 13:16:55 -0400182 }
183 }
184
185 If(alphaPass)
186 {
187 if(!earlyDepthTest)
188 {
189 for(unsigned int q = 0; q < state.multiSample; q++)
190 {
191 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
192 }
193 }
194
Nicolas Capens68a82382018-10-02 13:16:55 -0400195 If(depthPass || Bool(earlyDepthTest))
196 {
197 for(unsigned int q = 0; q < state.multiSample; q++)
198 {
199 if(state.multiSampleMask & (1 << q))
200 {
201 writeDepth(zBuffer, q, x, z[q], zMask[q]);
202
203 if(state.occlusionEnabled)
204 {
205 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
206 }
207 }
208 }
209
Chris Forbes71a1e012019-04-22 14:18:34 -0700210 rasterOperation(cBuffer, x, sMask, zMask, cMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400211 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400212 }
213 }
214
215 for(unsigned int q = 0; q < state.multiSample; q++)
216 {
217 if(state.multiSampleMask & (1 << q))
218 {
219 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
220 }
221 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400222 }
223
224 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
225 {
226 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
227
228 if(!flat)
229 {
230 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
231 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
232
233 if(perspective)
234 {
235 interpolant *= rhw;
236 }
237 }
238
239 return interpolant;
240 }
241
242 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
243 {
244 if(!state.stencilActive)
245 {
246 return;
247 }
248
249 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
250
251 Pointer<Byte> buffer = sBuffer + 2 * x;
252
253 if(q > 0)
254 {
255 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
256 }
257
258 Byte8 value = *Pointer<Byte8>(buffer);
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700259 Byte8 valueBack = value;
Nicolas Capens68a82382018-10-02 13:16:55 -0400260
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700261 if(state.frontStencil.compareMask != 0xff)
Nicolas Capens68a82382018-10-02 13:16:55 -0400262 {
263 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
264 }
265
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700266 stencilTest(value, state.frontStencil.compareOp, false);
Nicolas Capens68a82382018-10-02 13:16:55 -0400267
268 if(state.twoSidedStencil)
269 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700270 if(state.backStencil.compareMask != 0xff)
Nicolas Capens68a82382018-10-02 13:16:55 -0400271 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700272 valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400273 }
274
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700275 stencilTest(valueBack, state.backStencil.compareOp, true);
Nicolas Capens68a82382018-10-02 13:16:55 -0400276
277 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700278 valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
279 value |= valueBack;
Nicolas Capens68a82382018-10-02 13:16:55 -0400280 }
281
282 sMask = SignMask(value) & cMask;
283 }
284
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700285 void PixelRoutine::stencilTest(Byte8 &value, VkCompareOp stencilCompareMode, bool isBack)
Nicolas Capens68a82382018-10-02 13:16:55 -0400286 {
287 Byte8 equal;
288
289 switch(stencilCompareMode)
290 {
Alexis Hetudcb803a2018-11-15 16:25:38 -0500291 case VK_COMPARE_OP_ALWAYS:
Nicolas Capens68a82382018-10-02 13:16:55 -0400292 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
293 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500294 case VK_COMPARE_OP_NEVER:
Nicolas Capens68a82382018-10-02 13:16:55 -0400295 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
296 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500297 case VK_COMPARE_OP_LESS: // a < b ~ b > a
Nicolas Capens68a82382018-10-02 13:16:55 -0400298 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700299 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400300 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500301 case VK_COMPARE_OP_EQUAL:
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700302 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400303 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500304 case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b)
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700305 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400306 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
307 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500308 case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ (b > a) || (a == b)
Nicolas Capens68a82382018-10-02 13:16:55 -0400309 equal = value;
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700310 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400311 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700312 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400313 value |= equal;
314 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500315 case VK_COMPARE_OP_GREATER: // a > b
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700316 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400317 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
318 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
319 value = equal;
320 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500321 case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ !(a < b) ~ !(b > a)
Nicolas Capens68a82382018-10-02 13:16:55 -0400322 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700323 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[isBack].referenceMaskedSignedQ)));
Nicolas Capens68a82382018-10-02 13:16:55 -0400324 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
325 break;
326 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100327 UNIMPLEMENTED("VkCompareOp: %d", int(stencilCompareMode));
Nicolas Capens68a82382018-10-02 13:16:55 -0400328 }
329 }
330
Chris Forbesbea47512019-03-12 14:50:55 -0700331 Bool PixelRoutine::depthTest32F(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
Nicolas Capens68a82382018-10-02 13:16:55 -0400332 {
Nicolas Capens68a82382018-10-02 13:16:55 -0400333 Float4 Z = z;
334
Chris Forbes1845d5e2018-12-27 11:50:15 -0800335 if(spirvShader && spirvShader->getModes().DepthReplacing)
Nicolas Capens68a82382018-10-02 13:16:55 -0400336 {
Chris Forbesb4de34e2019-03-12 13:01:45 -0700337 Z = oDepth;
Nicolas Capens68a82382018-10-02 13:16:55 -0400338 }
339
340 Pointer<Byte> buffer;
341 Int pitch;
342
343 if(!state.quadLayoutDepthBuffer)
344 {
345 buffer = zBuffer + 4 * x;
346 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
347 }
348 else
349 {
350 buffer = zBuffer + 8 * x;
351 }
352
353 if(q > 0)
354 {
355 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
356 }
357
358 Float4 zValue;
359
Alexis Hetudcb803a2018-11-15 16:25:38 -0500360 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
Nicolas Capens68a82382018-10-02 13:16:55 -0400361 {
362 if(!state.quadLayoutDepthBuffer)
363 {
364 // FIXME: Properly optimizes?
365 zValue.xy = *Pointer<Float4>(buffer);
366 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
367 }
368 else
369 {
370 zValue = *Pointer<Float4>(buffer, 16);
371 }
372 }
373
374 Int4 zTest;
375
376 switch(state.depthCompareMode)
377 {
Alexis Hetudcb803a2018-11-15 16:25:38 -0500378 case VK_COMPARE_OP_ALWAYS:
Nicolas Capens68a82382018-10-02 13:16:55 -0400379 // Optimized
380 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500381 case VK_COMPARE_OP_NEVER:
Nicolas Capens68a82382018-10-02 13:16:55 -0400382 // Optimized
383 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500384 case VK_COMPARE_OP_EQUAL:
Nicolas Capens68a82382018-10-02 13:16:55 -0400385 zTest = CmpEQ(zValue, Z);
386 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500387 case VK_COMPARE_OP_NOT_EQUAL:
Nicolas Capens68a82382018-10-02 13:16:55 -0400388 zTest = CmpNEQ(zValue, Z);
389 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500390 case VK_COMPARE_OP_LESS:
Chris Forbesb4de34e2019-03-12 13:01:45 -0700391 zTest = CmpNLE(zValue, Z);
Nicolas Capens68a82382018-10-02 13:16:55 -0400392 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500393 case VK_COMPARE_OP_GREATER_OR_EQUAL:
Chris Forbesb4de34e2019-03-12 13:01:45 -0700394 zTest = CmpLE(zValue, Z);
Nicolas Capens68a82382018-10-02 13:16:55 -0400395 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500396 case VK_COMPARE_OP_LESS_OR_EQUAL:
Chris Forbesb4de34e2019-03-12 13:01:45 -0700397 zTest = CmpNLT(zValue, Z);
Nicolas Capens68a82382018-10-02 13:16:55 -0400398 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500399 case VK_COMPARE_OP_GREATER:
Chris Forbesb4de34e2019-03-12 13:01:45 -0700400 zTest = CmpLT(zValue, Z);
Nicolas Capens68a82382018-10-02 13:16:55 -0400401 break;
402 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100403 UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
Nicolas Capens68a82382018-10-02 13:16:55 -0400404 }
405
406 switch(state.depthCompareMode)
407 {
Alexis Hetudcb803a2018-11-15 16:25:38 -0500408 case VK_COMPARE_OP_ALWAYS:
Nicolas Capens68a82382018-10-02 13:16:55 -0400409 zMask = cMask;
410 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500411 case VK_COMPARE_OP_NEVER:
Nicolas Capens68a82382018-10-02 13:16:55 -0400412 zMask = 0x0;
413 break;
414 default:
415 zMask = SignMask(zTest) & cMask;
416 break;
417 }
418
419 if(state.stencilActive)
420 {
421 zMask &= sMask;
422 }
423
424 return zMask != 0;
425 }
426
Chris Forbesbea47512019-03-12 14:50:55 -0700427 Bool PixelRoutine::depthTest16(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
428 {
429 Short4 Z = convertFixed16(z, true);
430
431 if(spirvShader && spirvShader->getModes().DepthReplacing)
432 {
433 Z = convertFixed16(oDepth, true);
434 }
435
436 Pointer<Byte> buffer;
437 Int pitch;
438
439 if(!state.quadLayoutDepthBuffer)
440 {
441 buffer = zBuffer + 2 * x;
442 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
443 }
444 else
445 {
446 buffer = zBuffer + 4 * x;
447 }
448
449 if(q > 0)
450 {
451 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
452 }
453
454 Short4 zValue;
455
456 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
457 {
458 if(!state.quadLayoutDepthBuffer)
459 {
460 // FIXME: Properly optimizes?
461 zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
462 zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
463 }
464 else
465 {
466 zValue = *Pointer<Short4>(buffer, 8);
467 }
468 }
469
470 Int4 zTest;
471
472 // Bias values to make unsigned compares out of Reactor's (due SSE's) signed compares only
Alexis Hetu5078d482019-04-10 15:00:25 -0400473 zValue = zValue - Short4(0x8000u);
474 Z = Z - Short4(0x8000u);
Chris Forbesbea47512019-03-12 14:50:55 -0700475
476 switch(state.depthCompareMode)
477 {
478 case VK_COMPARE_OP_ALWAYS:
479 // Optimized
480 break;
481 case VK_COMPARE_OP_NEVER:
482 // Optimized
483 break;
484 case VK_COMPARE_OP_EQUAL:
485 zTest = Int4(CmpEQ(zValue, Z));
486 break;
487 case VK_COMPARE_OP_NOT_EQUAL:
488 zTest = ~Int4(CmpEQ(zValue, Z));
489 break;
490 case VK_COMPARE_OP_LESS:
491 zTest = Int4(CmpGT(zValue, Z));
492 break;
493 case VK_COMPARE_OP_GREATER_OR_EQUAL:
494 zTest = ~Int4(CmpGT(zValue, Z));
495 break;
496 case VK_COMPARE_OP_LESS_OR_EQUAL:
497 zTest = ~Int4(CmpGT(Z, zValue));
498 break;
499 case VK_COMPARE_OP_GREATER:
500 zTest = Int4(CmpGT(Z, zValue));
501 break;
502 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100503 UNIMPLEMENTED("VkCompareOp: %d", int(state.depthCompareMode));
Chris Forbesbea47512019-03-12 14:50:55 -0700504 }
505
506 switch(state.depthCompareMode)
507 {
508 case VK_COMPARE_OP_ALWAYS:
509 zMask = cMask;
510 break;
511 case VK_COMPARE_OP_NEVER:
512 zMask = 0x0;
513 break;
514 default:
515 zMask = SignMask(zTest) & cMask;
516 break;
517 }
518
519 if(state.stencilActive)
520 {
521 zMask &= sMask;
522 }
523
524 return zMask != 0;
525 }
526
527 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
528 {
529 if(!state.depthTestActive)
530 {
531 return true;
532 }
533
534 if (state.depthFormat == VK_FORMAT_D16_UNORM)
535 return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask);
536 else
537 return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask);
538 }
539
Nicolas Capens68a82382018-10-02 13:16:55 -0400540 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
541 {
542 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
543 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
544 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
545 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
546
547 Int aMask0 = SignMask(coverage0);
548 Int aMask1 = SignMask(coverage1);
549 Int aMask2 = SignMask(coverage2);
550 Int aMask3 = SignMask(coverage3);
551
552 cMask[0] &= aMask0;
553 cMask[1] &= aMask1;
554 cMask[2] &= aMask2;
555 cMask[3] &= aMask3;
556 }
557
Chris Forbesbea47512019-03-12 14:50:55 -0700558 void PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
Nicolas Capens68a82382018-10-02 13:16:55 -0400559 {
Nicolas Capens68a82382018-10-02 13:16:55 -0400560 Float4 Z = z;
561
Chris Forbes1845d5e2018-12-27 11:50:15 -0800562 if(spirvShader && spirvShader->getModes().DepthReplacing)
Nicolas Capens68a82382018-10-02 13:16:55 -0400563 {
Chris Forbesb4de34e2019-03-12 13:01:45 -0700564 Z = oDepth;
Nicolas Capens68a82382018-10-02 13:16:55 -0400565 }
566
567 Pointer<Byte> buffer;
568 Int pitch;
569
570 if(!state.quadLayoutDepthBuffer)
571 {
572 buffer = zBuffer + 4 * x;
573 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
574 }
575 else
576 {
577 buffer = zBuffer + 8 * x;
578 }
579
580 if(q > 0)
581 {
582 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
583 }
584
585 Float4 zValue;
586
Alexis Hetudcb803a2018-11-15 16:25:38 -0500587 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
Nicolas Capens68a82382018-10-02 13:16:55 -0400588 {
589 if(!state.quadLayoutDepthBuffer)
590 {
591 // FIXME: Properly optimizes?
592 zValue.xy = *Pointer<Float4>(buffer);
593 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
594 }
595 else
596 {
597 zValue = *Pointer<Float4>(buffer, 16);
598 }
599 }
600
601 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
602 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
603 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
604
605 if(!state.quadLayoutDepthBuffer)
606 {
607 // FIXME: Properly optimizes?
608 *Pointer<Float2>(buffer) = Float2(Z.xy);
609 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
610 }
611 else
612 {
613 *Pointer<Float4>(buffer, 16) = Z;
614 }
615 }
616
Chris Forbesbea47512019-03-12 14:50:55 -0700617 void PixelRoutine::writeDepth16(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
618 {
619 Short4 Z = As<Short4>(convertFixed16(z, true));
620
621 if(spirvShader && spirvShader->getModes().DepthReplacing)
622 {
623 Z = As<Short4>(convertFixed16(oDepth, true));
624 }
625
626 Pointer<Byte> buffer;
627 Int pitch;
628
629 if(!state.quadLayoutDepthBuffer)
630 {
631 buffer = zBuffer + 2 * x;
632 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
633 }
634 else
635 {
636 buffer = zBuffer + 4 * x;
637 }
638
639 if(q > 0)
640 {
641 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
642 }
643
644 Short4 zValue;
645
646 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
647 {
648 if(!state.quadLayoutDepthBuffer)
649 {
650 // FIXME: Properly optimizes?
651 zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
652 zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
653 }
654 else
655 {
656 zValue = *Pointer<Short4>(buffer, 8);
657 }
658 }
659
660 Z = Z & *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q) + zMask * 8, 8);
661 zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q) + zMask * 8, 8);
662 Z = Z | zValue;
663
664 if(!state.quadLayoutDepthBuffer)
665 {
666 // FIXME: Properly optimizes?
667 *Pointer<Short>(buffer) = Extract(Z, 0);
668 *Pointer<Short>(buffer+2) = Extract(Z, 1);
669 *Pointer<Short>(buffer+pitch) = Extract(Z, 2);
670 *Pointer<Short>(buffer+pitch+2) = Extract(Z, 3);
671 }
672 else
673 {
674 *Pointer<Short4>(buffer, 8) = Z;
675 }
676 }
677
678 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
679 {
680 if(!state.depthWriteEnable)
681 {
682 return;
683 }
684
685 if (state.depthFormat == VK_FORMAT_D16_UNORM)
686 writeDepth16(zBuffer, q, x, z, zMask);
687 else
688 writeDepth32F(zBuffer, q, x, z, zMask);
689 }
690
Nicolas Capens68a82382018-10-02 13:16:55 -0400691 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
692 {
693 if(!state.stencilActive)
694 {
695 return;
696 }
697
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700698 if(state.frontStencil.passOp == VK_STENCIL_OP_KEEP && state.frontStencil.depthFailOp == VK_STENCIL_OP_KEEP && state.frontStencil.failOp == VK_STENCIL_OP_KEEP)
Nicolas Capens68a82382018-10-02 13:16:55 -0400699 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700700 if(!state.twoSidedStencil || (state.backStencil.passOp == VK_STENCIL_OP_KEEP && state.backStencil.depthFailOp == VK_STENCIL_OP_KEEP && state.backStencil.failOp == VK_STENCIL_OP_KEEP))
Nicolas Capens68a82382018-10-02 13:16:55 -0400701 {
702 return;
703 }
704 }
705
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700706 if((state.frontStencil.writeMask == 0) && (!state.twoSidedStencil || (state.backStencil.writeMask == 0)))
Nicolas Capens68a82382018-10-02 13:16:55 -0400707 {
708 return;
709 }
710
711 Pointer<Byte> buffer = sBuffer + 2 * x;
712
713 if(q > 0)
714 {
715 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
716 }
717
718 Byte8 bufferValue = *Pointer<Byte8>(buffer);
719
720 Byte8 newValue;
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700721 stencilOperation(newValue, bufferValue, state.frontStencil, false, zMask, sMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400722
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700723 if(state.frontStencil.writeMask != 0)
Nicolas Capens68a82382018-10-02 13:16:55 -0400724 {
725 Byte8 maskedValue = bufferValue;
726 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
727 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
728 newValue |= maskedValue;
729 }
730
731 if(state.twoSidedStencil)
732 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700733 Byte8 newValueBack;
Nicolas Capens68a82382018-10-02 13:16:55 -0400734
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700735 stencilOperation(newValueBack, bufferValue, state.backStencil, true, zMask, sMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400736
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700737 if(state.backStencil.writeMask != 0)
Nicolas Capens68a82382018-10-02 13:16:55 -0400738 {
739 Byte8 maskedValue = bufferValue;
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700740 newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400741 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700742 newValueBack |= maskedValue;
Nicolas Capens68a82382018-10-02 13:16:55 -0400743 }
744
745 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700746 newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
747 newValue |= newValueBack;
Nicolas Capens68a82382018-10-02 13:16:55 -0400748 }
749
750 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
751 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
752 newValue |= bufferValue;
753
754 *Pointer<Byte4>(buffer) = Byte4(newValue);
755 }
756
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700757 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, VkStencilOpState const &ops, bool isBack, Int &zMask, Int &sMask)
Nicolas Capens68a82382018-10-02 13:16:55 -0400758 {
759 Byte8 &pass = newValue;
760 Byte8 fail;
761 Byte8 zFail;
762
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700763 stencilOperation(pass, bufferValue, ops.passOp, isBack);
Nicolas Capens68a82382018-10-02 13:16:55 -0400764
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700765 if(ops.depthFailOp != ops.passOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400766 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700767 stencilOperation(zFail, bufferValue, ops.depthFailOp, isBack);
Nicolas Capens68a82382018-10-02 13:16:55 -0400768 }
769
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700770 if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400771 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700772 stencilOperation(fail, bufferValue, ops.failOp, isBack);
Nicolas Capens68a82382018-10-02 13:16:55 -0400773 }
774
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700775 if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400776 {
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700777 if(state.depthTestActive && ops.depthFailOp != ops.passOp) // zMask valid and values not the same
Nicolas Capens68a82382018-10-02 13:16:55 -0400778 {
779 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
780 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
781 pass |= zFail;
782 }
783
784 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
785 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
786 pass |= fail;
787 }
788 }
789
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700790 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, VkStencilOp operation, bool isBack)
Nicolas Capens68a82382018-10-02 13:16:55 -0400791 {
792 switch(operation)
793 {
Alexis Hetudcb803a2018-11-15 16:25:38 -0500794 case VK_STENCIL_OP_KEEP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400795 output = bufferValue;
796 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500797 case VK_STENCIL_OP_ZERO:
Nicolas Capens68a82382018-10-02 13:16:55 -0400798 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
799 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500800 case VK_STENCIL_OP_REPLACE:
Chris Forbes1bd9e2f2019-03-18 11:41:56 -0700801 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[isBack].referenceQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400802 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500803 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400804 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
805 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500806 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400807 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
808 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500809 case VK_STENCIL_OP_INVERT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400810 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
811 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500812 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400813 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
814 break;
Alexis Hetudcb803a2018-11-15 16:25:38 -0500815 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
Nicolas Capens68a82382018-10-02 13:16:55 -0400816 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
817 break;
818 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100819 UNIMPLEMENTED("VkStencilOp: %d", int(operation));
Nicolas Capens68a82382018-10-02 13:16:55 -0400820 }
821 }
822
Alexis Hetu4ad23222018-11-22 16:40:52 -0500823 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorActive)
Nicolas Capens68a82382018-10-02 13:16:55 -0400824 {
825 switch(blendFactorActive)
826 {
Alexis Hetu4ad23222018-11-22 16:40:52 -0500827 case VK_BLEND_FACTOR_ZERO:
Nicolas Capens68a82382018-10-02 13:16:55 -0400828 // Optimized
829 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500830 case VK_BLEND_FACTOR_ONE:
Nicolas Capens68a82382018-10-02 13:16:55 -0400831 // Optimized
832 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500833 case VK_BLEND_FACTOR_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400834 blendFactor.x = current.x;
835 blendFactor.y = current.y;
836 blendFactor.z = current.z;
837 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500838 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400839 blendFactor.x = Short4(0xFFFFu) - current.x;
840 blendFactor.y = Short4(0xFFFFu) - current.y;
841 blendFactor.z = Short4(0xFFFFu) - current.z;
842 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500843 case VK_BLEND_FACTOR_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400844 blendFactor.x = pixel.x;
845 blendFactor.y = pixel.y;
846 blendFactor.z = pixel.z;
847 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500848 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400849 blendFactor.x = Short4(0xFFFFu) - pixel.x;
850 blendFactor.y = Short4(0xFFFFu) - pixel.y;
851 blendFactor.z = Short4(0xFFFFu) - pixel.z;
852 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500853 case VK_BLEND_FACTOR_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400854 blendFactor.x = current.w;
855 blendFactor.y = current.w;
856 blendFactor.z = current.w;
857 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500858 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400859 blendFactor.x = Short4(0xFFFFu) - current.w;
860 blendFactor.y = Short4(0xFFFFu) - current.w;
861 blendFactor.z = Short4(0xFFFFu) - current.w;
862 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500863 case VK_BLEND_FACTOR_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400864 blendFactor.x = pixel.w;
865 blendFactor.y = pixel.w;
866 blendFactor.z = pixel.w;
867 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500868 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400869 blendFactor.x = Short4(0xFFFFu) - pixel.w;
870 blendFactor.y = Short4(0xFFFFu) - pixel.w;
871 blendFactor.z = Short4(0xFFFFu) - pixel.w;
872 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500873 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
Nicolas Capens68a82382018-10-02 13:16:55 -0400874 blendFactor.x = Short4(0xFFFFu) - pixel.w;
875 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
876 blendFactor.y = blendFactor.x;
877 blendFactor.z = blendFactor.x;
878 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500879 case VK_BLEND_FACTOR_CONSTANT_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400880 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
881 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
882 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
883 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500884 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400885 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
886 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
887 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
888 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500889 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400890 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
891 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
892 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
893 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500894 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400895 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
896 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
897 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
898 break;
899 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100900 UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
Nicolas Capens68a82382018-10-02 13:16:55 -0400901 }
902 }
903
Alexis Hetu4ad23222018-11-22 16:40:52 -0500904 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorAlphaActive)
Nicolas Capens68a82382018-10-02 13:16:55 -0400905 {
906 switch(blendFactorAlphaActive)
907 {
Alexis Hetu4ad23222018-11-22 16:40:52 -0500908 case VK_BLEND_FACTOR_ZERO:
Nicolas Capens68a82382018-10-02 13:16:55 -0400909 // Optimized
910 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500911 case VK_BLEND_FACTOR_ONE:
Nicolas Capens68a82382018-10-02 13:16:55 -0400912 // Optimized
913 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500914 case VK_BLEND_FACTOR_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400915 blendFactor.w = current.w;
916 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500917 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400918 blendFactor.w = Short4(0xFFFFu) - current.w;
919 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500920 case VK_BLEND_FACTOR_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400921 blendFactor.w = pixel.w;
922 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500923 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -0400924 blendFactor.w = Short4(0xFFFFu) - pixel.w;
925 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500926 case VK_BLEND_FACTOR_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400927 blendFactor.w = current.w;
928 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500929 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400930 blendFactor.w = Short4(0xFFFFu) - current.w;
931 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500932 case VK_BLEND_FACTOR_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400933 blendFactor.w = pixel.w;
934 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500935 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400936 blendFactor.w = Short4(0xFFFFu) - pixel.w;
937 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500938 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
Nicolas Capens68a82382018-10-02 13:16:55 -0400939 blendFactor.w = Short4(0xFFFFu);
940 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500941 case VK_BLEND_FACTOR_CONSTANT_COLOR:
942 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400943 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
944 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -0500945 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
946 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -0400947 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
948 break;
949 default:
Ben Clayton3bb94902019-04-07 13:10:54 +0100950 UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
Nicolas Capens68a82382018-10-02 13:16:55 -0400951 }
952 }
953
954 bool PixelRoutine::isSRGB(int index) const
955 {
Alexis Hetu25ec7b02019-03-12 14:19:22 -0400956 return vk::Format(state.targetFormat[index]).isSRGBformat();
Nicolas Capens68a82382018-10-02 13:16:55 -0400957 }
958
959 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
960 {
961 Short4 c01;
962 Short4 c23;
963 Pointer<Byte> buffer;
964 Pointer<Byte> buffer2;
965
966 switch(state.targetFormat[index])
967 {
Alexis Hetudd152e12018-11-14 13:39:28 -0500968 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -0400969 buffer = cBuffer + 2 * x;
970 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
971 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
972
973 pixel.x = c01 & Short4(0xF800u);
974 pixel.y = (c01 & Short4(0x07E0u)) << 5;
975 pixel.z = (c01 & Short4(0x001Fu)) << 11;
976 pixel.w = Short4(0xFFFFu);
977 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500978 case VK_FORMAT_B8G8R8A8_UNORM:
Chris Forbes6407c1a2019-04-15 17:22:57 -0700979 case VK_FORMAT_B8G8R8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -0400980 buffer = cBuffer + 4 * x;
981 c01 = *Pointer<Short4>(buffer);
982 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
983 c23 = *Pointer<Short4>(buffer);
984 pixel.z = c01;
985 pixel.y = c01;
986 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
987 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
988 pixel.x = pixel.z;
989 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
990 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
991 pixel.y = pixel.z;
992 pixel.w = pixel.x;
993 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
994 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
995 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
996 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
997 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500998 case VK_FORMAT_R8G8B8A8_UNORM:
999 case VK_FORMAT_R8G8B8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -04001000 buffer = cBuffer + 4 * x;
1001 c01 = *Pointer<Short4>(buffer);
1002 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1003 c23 = *Pointer<Short4>(buffer);
1004 pixel.z = c01;
1005 pixel.y = c01;
1006 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1007 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1008 pixel.x = pixel.z;
1009 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1010 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1011 pixel.y = pixel.z;
1012 pixel.w = pixel.x;
1013 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1014 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1015 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1016 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1017 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001018 case VK_FORMAT_R8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001019 buffer = cBuffer + 1 * x;
1020 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
1021 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1022 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
1023 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1024 pixel.y = Short4(0x0000);
1025 pixel.z = Short4(0x0000);
1026 pixel.w = Short4(0xFFFFu);
1027 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001028 case VK_FORMAT_R8G8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001029 buffer = cBuffer + 2 * x;
1030 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
1031 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1032 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
1033 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
1034 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
1035 pixel.z = Short4(0x0000u);
1036 pixel.w = Short4(0xFFFFu);
1037 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001038 case VK_FORMAT_R16G16B16A16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001039 buffer = cBuffer;
1040 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1041 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
1042 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1043 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1044 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1045 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
1046 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001047 case VK_FORMAT_R16G16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001048 buffer = cBuffer;
1049 pixel.x = *Pointer<Short4>(buffer + 4 * x);
1050 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1051 pixel.y = *Pointer<Short4>(buffer + 4 * x);
1052 pixel.z = pixel.x;
1053 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1054 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1055 pixel.y = pixel.z;
1056 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1057 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1058 pixel.z = Short4(0xFFFFu);
1059 pixel.w = Short4(0xFFFFu);
1060 break;
1061 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001062 UNIMPLEMENTED("VkFormat %d", state.targetFormat[index]);
Nicolas Capens68a82382018-10-02 13:16:55 -04001063 }
1064
Alexis Hetu8af8b402019-05-28 14:48:19 -04001065 if(isSRGB(index))
Nicolas Capens68a82382018-10-02 13:16:55 -04001066 {
1067 sRGBtoLinear16_12_16(pixel);
1068 }
1069 }
1070
1071 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
1072 {
1073 if(!state.alphaBlendActive)
1074 {
1075 return;
1076 }
1077
1078 Vector4s pixel;
1079 readPixel(index, cBuffer, x, pixel);
1080
1081 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
1082 Vector4s sourceFactor;
1083 Vector4s destFactor;
1084
1085 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor);
1086 blendFactor(destFactor, current, pixel, state.destBlendFactor);
1087
Alexis Hetu4ad23222018-11-22 16:40:52 -05001088 if(state.sourceBlendFactor != VK_BLEND_FACTOR_ONE && state.sourceBlendFactor != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001089 {
1090 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1091 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1092 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
1093 }
1094
Alexis Hetu4ad23222018-11-22 16:40:52 -05001095 if(state.destBlendFactor != VK_BLEND_FACTOR_ONE && state.destBlendFactor != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001096 {
1097 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1098 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1099 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
1100 }
1101
1102 switch(state.blendOperation)
1103 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001104 case VK_BLEND_OP_ADD:
Nicolas Capens68a82382018-10-02 13:16:55 -04001105 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1106 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1107 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
1108 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001109 case VK_BLEND_OP_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001110 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1111 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1112 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
1113 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001114 case VK_BLEND_OP_REVERSE_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001115 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1116 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1117 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
1118 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001119 case VK_BLEND_OP_MIN:
Nicolas Capens68a82382018-10-02 13:16:55 -04001120 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1121 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1122 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
1123 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001124 case VK_BLEND_OP_MAX:
Nicolas Capens68a82382018-10-02 13:16:55 -04001125 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1126 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1127 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
1128 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001129 case VK_BLEND_OP_SRC_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001130 // No operation
1131 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001132 case VK_BLEND_OP_DST_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001133 current.x = pixel.x;
1134 current.y = pixel.y;
1135 current.z = pixel.z;
1136 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001137 case VK_BLEND_OP_ZERO_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001138 current.x = Short4(0x0000);
1139 current.y = Short4(0x0000);
1140 current.z = Short4(0x0000);
1141 break;
1142 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001143 UNIMPLEMENTED("VkBlendOp: %d", int(state.blendOperation));
Nicolas Capens68a82382018-10-02 13:16:55 -04001144 }
1145
1146 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1147 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha);
1148
Alexis Hetu4ad23222018-11-22 16:40:52 -05001149 if(state.sourceBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.sourceBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001150 {
1151 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
1152 }
1153
Alexis Hetu4ad23222018-11-22 16:40:52 -05001154 if(state.destBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.destBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001155 {
1156 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
1157 }
1158
1159 switch(state.blendOperationAlpha)
1160 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001161 case VK_BLEND_OP_ADD:
Nicolas Capens68a82382018-10-02 13:16:55 -04001162 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
1163 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001164 case VK_BLEND_OP_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001165 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
1166 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001167 case VK_BLEND_OP_REVERSE_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001168 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
1169 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001170 case VK_BLEND_OP_MIN:
Nicolas Capens68a82382018-10-02 13:16:55 -04001171 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
1172 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001173 case VK_BLEND_OP_MAX:
Nicolas Capens68a82382018-10-02 13:16:55 -04001174 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
1175 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001176 case VK_BLEND_OP_SRC_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001177 // No operation
1178 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001179 case VK_BLEND_OP_DST_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001180 current.w = pixel.w;
1181 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001182 case VK_BLEND_OP_ZERO_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001183 current.w = Short4(0x0000);
1184 break;
1185 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001186 UNIMPLEMENTED("VkBlendOp: %d", int(state.blendOperationAlpha));
Nicolas Capens68a82382018-10-02 13:16:55 -04001187 }
1188 }
1189
Nicolas Capens68a82382018-10-02 13:16:55 -04001190 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
1191 {
Alexis Hetu8af8b402019-05-28 14:48:19 -04001192 if(isSRGB(index))
Nicolas Capens68a82382018-10-02 13:16:55 -04001193 {
1194 linearToSRGB16_12_16(current);
1195 }
1196
Alexis Hetu8af8b402019-05-28 14:48:19 -04001197 switch(state.targetFormat[index])
Nicolas Capens68a82382018-10-02 13:16:55 -04001198 {
Alexis Hetu8af8b402019-05-28 14:48:19 -04001199 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1200 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1201 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1202 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
1203 break;
1204 case VK_FORMAT_B8G8R8A8_UNORM:
1205 case VK_FORMAT_B8G8R8A8_SRGB:
1206 case VK_FORMAT_R8G8B8A8_UNORM:
1207 case VK_FORMAT_R8G8B8A8_SRGB:
1208 case VK_FORMAT_R8G8_UNORM:
1209 case VK_FORMAT_R8_UNORM:
1210 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1211 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
1212 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1213 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1214 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1215 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
1216 break;
1217 default:
1218 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001219 }
1220
1221 int rgbaWriteMask = state.colorWriteActive(index);
1222 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
1223
1224 switch(state.targetFormat[index])
1225 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001226 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001227 {
1228 current.x = current.x & Short4(0xF800u);
1229 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1230 current.z = As<UShort4>(current.z) >> 11;
1231
1232 current.x = current.x | current.y | current.z;
1233 }
1234 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001235 case VK_FORMAT_B8G8R8A8_UNORM:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001236 case VK_FORMAT_B8G8R8A8_SRGB:
Alexis Hetu8aa74a42018-10-22 14:54:09 -04001237 if(rgbaWriteMask == 0x7)
Nicolas Capens68a82382018-10-02 13:16:55 -04001238 {
1239 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1240 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1241 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1242
1243 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1244 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1245
1246 current.x = current.z;
1247 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1248 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1249 current.y = current.z;
1250 current.z = As<Short4>(UnpackLow(current.z, current.x));
1251 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1252 }
1253 else
1254 {
1255 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1256 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1257 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1258 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1259
1260 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1261 current.y = As<Short4>(PackUnsigned(current.y, current.w));
1262
1263 current.x = current.z;
1264 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1265 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1266 current.y = current.z;
1267 current.z = As<Short4>(UnpackLow(current.z, current.x));
1268 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1269 }
1270 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001271 case VK_FORMAT_R8G8B8A8_UNORM:
1272 case VK_FORMAT_R8G8B8A8_SRGB:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001273 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1274 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
Alexis Hetu8aa74a42018-10-22 14:54:09 -04001275 if(rgbaWriteMask == 0x7)
Nicolas Capens68a82382018-10-02 13:16:55 -04001276 {
1277 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1278 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1279 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1280
1281 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1282 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1283
1284 current.x = current.z;
1285 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1286 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1287 current.y = current.z;
1288 current.z = As<Short4>(UnpackLow(current.z, current.x));
1289 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1290 }
1291 else
1292 {
1293 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1294 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1295 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1296 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1297
1298 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1299 current.y = As<Short4>(PackUnsigned(current.y, current.w));
1300
1301 current.x = current.z;
1302 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1303 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1304 current.y = current.z;
1305 current.z = As<Short4>(UnpackLow(current.z, current.x));
1306 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1307 }
1308 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001309 case VK_FORMAT_R8G8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001310 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1311 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1312 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1313 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1314 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1315 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001316 case VK_FORMAT_R8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001317 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1318 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1319 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001320 case VK_FORMAT_R16G16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001321 current.z = current.x;
1322 current.x = As<Short4>(UnpackLow(current.x, current.y));
1323 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1324 current.y = current.z;
1325 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001326 case VK_FORMAT_R16G16B16A16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001327 transpose4x4(current.x, current.y, current.z, current.w);
1328 break;
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001329 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1330 {
1331 auto r = Int4(current.x) & Int4(0x3ff);
1332 auto g = Int4(current.y) & Int4(0x3ff);
1333 auto b = Int4(current.z) & Int4(0x3ff);
1334 auto a = Int4(current.w) & Int4(0x3);
1335 Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
1336 auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
1337 auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
1338 current.x = UnpackLow(c02, c13);
1339 current.y = UnpackHigh(c02, c13);
1340 break;
1341 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001342 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001343 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001344 }
1345
1346 Short4 c01 = current.z;
1347 Short4 c23 = current.y;
1348
1349 Int xMask; // Combination of all masks
1350
1351 if(state.depthTestActive)
1352 {
1353 xMask = zMask;
1354 }
1355 else
1356 {
1357 xMask = cMask;
1358 }
1359
1360 if(state.stencilActive)
1361 {
1362 xMask &= sMask;
1363 }
1364
1365 switch(state.targetFormat[index])
1366 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001367 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001368 {
1369 Pointer<Byte> buffer = cBuffer + 2 * x;
1370 Int value = *Pointer<Int>(buffer);
1371
1372 Int c01 = Extract(As<Int2>(current.x), 0);
1373
1374 if((bgraWriteMask & 0x00000007) != 0x00000007)
1375 {
1376 Int masked = value;
1377 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1378 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
1379 c01 |= masked;
1380 }
1381
1382 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1383 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
1384 c01 |= value;
1385 *Pointer<Int>(buffer) = c01;
1386
1387 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1388 value = *Pointer<Int>(buffer);
1389
1390 Int c23 = Extract(As<Int2>(current.x), 1);
1391
1392 if((bgraWriteMask & 0x00000007) != 0x00000007)
1393 {
1394 Int masked = value;
1395 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1396 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
1397 c23 |= masked;
1398 }
1399
1400 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1401 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
1402 c23 |= value;
1403 *Pointer<Int>(buffer) = c23;
1404 }
1405 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001406 case VK_FORMAT_B8G8R8A8_UNORM:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001407 case VK_FORMAT_B8G8R8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -04001408 {
1409 Pointer<Byte> buffer = cBuffer + x * 4;
1410 Short4 value = *Pointer<Short4>(buffer);
1411
Chris Forbes6407c1a2019-04-15 17:22:57 -07001412 if(bgraWriteMask != 0x0000000F) // FIXME: Need for masking when XRGB && Fh?
Nicolas Capens68a82382018-10-02 13:16:55 -04001413 {
1414 Short4 masked = value;
1415 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1416 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1417 c01 |= masked;
1418 }
1419
1420 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1421 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1422 c01 |= value;
1423 *Pointer<Short4>(buffer) = c01;
1424
1425 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1426 value = *Pointer<Short4>(buffer);
1427
Chris Forbes6407c1a2019-04-15 17:22:57 -07001428 if(bgraWriteMask != 0x0000000F) // FIXME: Need for masking when XRGB && Fh?
Nicolas Capens68a82382018-10-02 13:16:55 -04001429 {
1430 Short4 masked = value;
1431 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1432 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1433 c23 |= masked;
1434 }
1435
1436 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1437 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1438 c23 |= value;
1439 *Pointer<Short4>(buffer) = c23;
1440 }
1441 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001442 case VK_FORMAT_R8G8B8A8_UNORM:
1443 case VK_FORMAT_R8G8B8A8_SRGB:
Chris Forbes6407c1a2019-04-15 17:22:57 -07001444 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1445 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04001446 {
1447 Pointer<Byte> buffer = cBuffer + x * 4;
1448 Short4 value = *Pointer<Short4>(buffer);
1449
Chris Forbes6407c1a2019-04-15 17:22:57 -07001450 bool masked = (rgbaWriteMask != 0x0000000F); // FIXME: Need for masking when XBGR && Fh?
Nicolas Capens68a82382018-10-02 13:16:55 -04001451
1452 if(masked)
1453 {
1454 Short4 masked = value;
1455 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1456 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1457 c01 |= masked;
1458 }
1459
1460 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1461 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1462 c01 |= value;
1463 *Pointer<Short4>(buffer) = c01;
1464
1465 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1466 value = *Pointer<Short4>(buffer);
1467
1468 if(masked)
1469 {
1470 Short4 masked = value;
1471 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1472 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1473 c23 |= masked;
1474 }
1475
1476 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1477 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1478 c23 |= value;
1479 *Pointer<Short4>(buffer) = c23;
1480 }
1481 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001482 case VK_FORMAT_R8G8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001483 if((rgbaWriteMask & 0x00000003) != 0x0)
1484 {
1485 Pointer<Byte> buffer = cBuffer + 2 * x;
1486 Int2 value;
1487 value = Insert(value, *Pointer<Int>(buffer), 0);
1488 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1489 value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
1490
1491 Int2 packedCol = As<Int2>(current.x);
1492
1493 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1494 if((rgbaWriteMask & 0x3) != 0x3)
1495 {
1496 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1497 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1498 mergedMask &= rgbaMask;
1499 }
1500
1501 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1502
1503 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
1504 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
1505 }
1506 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001507 case VK_FORMAT_R8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001508 if(rgbaWriteMask & 0x00000001)
1509 {
1510 Pointer<Byte> buffer = cBuffer + 1 * x;
1511 Short4 value;
1512 value = Insert(value, *Pointer<Short>(buffer), 0);
1513 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1514 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
1515
1516 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1517 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1518 current.x |= value;
1519
1520 *Pointer<Short>(buffer) = Extract(current.x, 0);
1521 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
1522 }
1523 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001524 case VK_FORMAT_R16G16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001525 {
1526 Pointer<Byte> buffer = cBuffer + 4 * x;
1527
1528 Short4 value = *Pointer<Short4>(buffer);
1529
1530 if((rgbaWriteMask & 0x00000003) != 0x00000003)
1531 {
1532 Short4 masked = value;
1533 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1534 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
1535 current.x |= masked;
1536 }
1537
1538 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1539 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1540 current.x |= value;
1541 *Pointer<Short4>(buffer) = current.x;
1542
1543 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1544
1545 value = *Pointer<Short4>(buffer);
1546
1547 if((rgbaWriteMask & 0x00000003) != 0x00000003)
1548 {
1549 Short4 masked = value;
1550 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1551 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
1552 current.y |= masked;
1553 }
1554
1555 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1556 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1557 current.y |= value;
1558 *Pointer<Short4>(buffer) = current.y;
1559 }
1560 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001561 case VK_FORMAT_R16G16B16A16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001562 {
1563 Pointer<Byte> buffer = cBuffer + 8 * x;
1564
1565 {
1566 Short4 value = *Pointer<Short4>(buffer);
1567
1568 if(rgbaWriteMask != 0x0000000F)
1569 {
1570 Short4 masked = value;
1571 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1572 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1573 current.x |= masked;
1574 }
1575
1576 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1577 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
1578 current.x |= value;
1579 *Pointer<Short4>(buffer) = current.x;
1580 }
1581
1582 {
1583 Short4 value = *Pointer<Short4>(buffer + 8);
1584
1585 if(rgbaWriteMask != 0x0000000F)
1586 {
1587 Short4 masked = value;
1588 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1589 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1590 current.y |= masked;
1591 }
1592
1593 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1594 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
1595 current.y |= value;
1596 *Pointer<Short4>(buffer + 8) = current.y;
1597 }
1598
1599 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1600
1601 {
1602 Short4 value = *Pointer<Short4>(buffer);
1603
1604 if(rgbaWriteMask != 0x0000000F)
1605 {
1606 Short4 masked = value;
1607 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1608 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1609 current.z |= masked;
1610 }
1611
1612 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1613 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
1614 current.z |= value;
1615 *Pointer<Short4>(buffer) = current.z;
1616 }
1617
1618 {
1619 Short4 value = *Pointer<Short4>(buffer + 8);
1620
1621 if(rgbaWriteMask != 0x0000000F)
1622 {
1623 Short4 masked = value;
1624 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1625 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1626 current.w |= masked;
1627 }
1628
1629 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1630 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
1631 current.w |= value;
1632 *Pointer<Short4>(buffer + 8) = current.w;
1633 }
1634 }
1635 break;
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001636 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1637 {
1638 Pointer<Byte> buffer = cBuffer + 4 * x;
1639
1640 buffer = cBuffer + 4 * x;
1641 Int2 value = *Pointer<Int2>(buffer, 16);
1642 Int2 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
1643 if (rgbaWriteMask != 0xF)
1644 {
1645 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
1646 }
1647 *Pointer<Int2>(buffer) = (As<Int2>(current.x) & mergedMask) | (value & ~mergedMask);
1648
1649 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1650
1651 value = *Pointer<Int2>(buffer, 16);
1652 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
1653 if (rgbaWriteMask != 0xF)
1654 {
1655 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
1656 }
1657 *Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask);
1658 }
1659 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001660 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001661 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001662 }
1663 }
1664
Alexis Hetu4ad23222018-11-22 16:40:52 -05001665 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, VkBlendFactor blendFactorActive)
Nicolas Capens68a82382018-10-02 13:16:55 -04001666 {
1667 switch(blendFactorActive)
1668 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001669 case VK_BLEND_FACTOR_ZERO:
Chris Forbes4d659342019-05-10 13:40:00 -07001670 blendFactor.x = Float4(0);
1671 blendFactor.y = Float4(0);
1672 blendFactor.z = Float4(0);
Nicolas Capens68a82382018-10-02 13:16:55 -04001673 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001674 case VK_BLEND_FACTOR_ONE:
Chris Forbes4d659342019-05-10 13:40:00 -07001675 blendFactor.x = Float4(1);
1676 blendFactor.y = Float4(1);
1677 blendFactor.z = Float4(1);
Nicolas Capens68a82382018-10-02 13:16:55 -04001678 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001679 case VK_BLEND_FACTOR_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001680 blendFactor.x = oC.x;
1681 blendFactor.y = oC.y;
1682 blendFactor.z = oC.z;
1683 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001684 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001685 blendFactor.x = Float4(1.0f) - oC.x;
1686 blendFactor.y = Float4(1.0f) - oC.y;
1687 blendFactor.z = Float4(1.0f) - oC.z;
1688 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001689 case VK_BLEND_FACTOR_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001690 blendFactor.x = pixel.x;
1691 blendFactor.y = pixel.y;
1692 blendFactor.z = pixel.z;
1693 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001694 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001695 blendFactor.x = Float4(1.0f) - pixel.x;
1696 blendFactor.y = Float4(1.0f) - pixel.y;
1697 blendFactor.z = Float4(1.0f) - pixel.z;
1698 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001699 case VK_BLEND_FACTOR_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001700 blendFactor.x = oC.w;
1701 blendFactor.y = oC.w;
1702 blendFactor.z = oC.w;
1703 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001704 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001705 blendFactor.x = Float4(1.0f) - oC.w;
1706 blendFactor.y = Float4(1.0f) - oC.w;
1707 blendFactor.z = Float4(1.0f) - oC.w;
1708 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001709 case VK_BLEND_FACTOR_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001710 blendFactor.x = pixel.w;
1711 blendFactor.y = pixel.w;
1712 blendFactor.z = pixel.w;
1713 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001714 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001715 blendFactor.x = Float4(1.0f) - pixel.w;
1716 blendFactor.y = Float4(1.0f) - pixel.w;
1717 blendFactor.z = Float4(1.0f) - pixel.w;
1718 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001719 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
Nicolas Capens68a82382018-10-02 13:16:55 -04001720 blendFactor.x = Float4(1.0f) - pixel.w;
1721 blendFactor.x = Min(blendFactor.x, oC.w);
1722 blendFactor.y = blendFactor.x;
1723 blendFactor.z = blendFactor.x;
1724 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001725 case VK_BLEND_FACTOR_CONSTANT_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001726 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
1727 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
1728 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
1729 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01001730 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1731 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1732 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1733 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1734 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001735 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001736 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1737 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1738 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
1739 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01001740 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1741 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1742 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1743 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1744 break;
1745
Nicolas Capens68a82382018-10-02 13:16:55 -04001746 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001747 UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorActive));
Nicolas Capens68a82382018-10-02 13:16:55 -04001748 }
1749 }
1750
Alexis Hetu4ad23222018-11-22 16:40:52 -05001751 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, VkBlendFactor blendFactorAlphaActive)
Nicolas Capens68a82382018-10-02 13:16:55 -04001752 {
1753 switch(blendFactorAlphaActive)
1754 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001755 case VK_BLEND_FACTOR_ZERO:
Chris Forbes4d659342019-05-10 13:40:00 -07001756 blendFactor.w = Float4(0);
Nicolas Capens68a82382018-10-02 13:16:55 -04001757 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001758 case VK_BLEND_FACTOR_ONE:
Chris Forbes4d659342019-05-10 13:40:00 -07001759 blendFactor.w = Float4(1);
Nicolas Capens68a82382018-10-02 13:16:55 -04001760 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001761 case VK_BLEND_FACTOR_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001762 blendFactor.w = oC.w;
1763 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001764 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001765 blendFactor.w = Float4(1.0f) - oC.w;
1766 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001767 case VK_BLEND_FACTOR_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001768 blendFactor.w = pixel.w;
1769 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001770 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
Nicolas Capens68a82382018-10-02 13:16:55 -04001771 blendFactor.w = Float4(1.0f) - pixel.w;
1772 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001773 case VK_BLEND_FACTOR_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001774 blendFactor.w = oC.w;
1775 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001776 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001777 blendFactor.w = Float4(1.0f) - oC.w;
1778 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001779 case VK_BLEND_FACTOR_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001780 blendFactor.w = pixel.w;
1781 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001782 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001783 blendFactor.w = Float4(1.0f) - pixel.w;
1784 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001785 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
Nicolas Capens68a82382018-10-02 13:16:55 -04001786 blendFactor.w = Float4(1.0f);
1787 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001788 case VK_BLEND_FACTOR_CONSTANT_COLOR:
Ben Clayton8ab40532019-05-10 16:23:13 +01001789 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001790 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1791 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001792 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
Ben Clayton8ab40532019-05-10 16:23:13 +01001793 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
Nicolas Capens68a82382018-10-02 13:16:55 -04001794 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1795 break;
1796 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001797 UNIMPLEMENTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
Nicolas Capens68a82382018-10-02 13:16:55 -04001798 }
1799 }
1800
1801 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
1802 {
1803 if(!state.alphaBlendActive)
1804 {
1805 return;
1806 }
1807
1808 Pointer<Byte> buffer;
Ben Clayton8ab40532019-05-10 16:23:13 +01001809
1810 // pixel holds four texel color values.
1811 // Note: Despite the type being Vector4f, the colors may be stored as
1812 // integers. Half-floats are stored as full 32-bit floats.
1813 // Non-float and non-fixed point formats are not alpha blended.
Nicolas Capens68a82382018-10-02 13:16:55 -04001814 Vector4f pixel;
1815
1816 Vector4s color;
1817 Short4 c01;
1818 Short4 c23;
1819
1820 Float4 one;
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001821 vk::Format format(state.targetFormat[index]);
1822 if(format.isFloatFormat())
Nicolas Capens68a82382018-10-02 13:16:55 -04001823 {
1824 one = Float4(1.0f);
1825 }
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001826 else if(format.isNonNormalizedInteger())
Nicolas Capens68a82382018-10-02 13:16:55 -04001827 {
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001828 one = As<Float4>(format.isUnsignedComponent(0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
Nicolas Capens68a82382018-10-02 13:16:55 -04001829 }
1830
1831 switch(state.targetFormat[index])
1832 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001833 case VK_FORMAT_R32_SINT:
1834 case VK_FORMAT_R32_UINT:
1835 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001836 buffer = cBuffer;
1837 // FIXME: movlps
1838 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
1839 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
1840 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1841 // FIXME: movhps
1842 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
1843 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
1844 pixel.y = pixel.z = pixel.w = one;
1845 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001846 case VK_FORMAT_R32G32_SINT:
1847 case VK_FORMAT_R32G32_UINT:
1848 case VK_FORMAT_R32G32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001849 buffer = cBuffer;
1850 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
1851 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1852 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
1853 pixel.z = pixel.x;
1854 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
1855 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
1856 pixel.y = pixel.z;
1857 pixel.z = pixel.w = one;
1858 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001859 case VK_FORMAT_R32G32B32A32_SFLOAT:
1860 case VK_FORMAT_R32G32B32A32_SINT:
1861 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001862 buffer = cBuffer;
1863 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
1864 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
1865 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1866 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
1867 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
1868 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
Nicolas Capens68a82382018-10-02 13:16:55 -04001869 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01001870 case VK_FORMAT_R16_SFLOAT:
1871 buffer = cBuffer;
1872 pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0));
1873 pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2));
1874 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1875 pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0));
1876 pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2));
1877 pixel.y = pixel.z = pixel.w = one;
1878 break;
1879 case VK_FORMAT_R16G16_SFLOAT:
1880 buffer = cBuffer;
1881 pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0));
1882 pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2));
1883 pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4));
1884 pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6));
1885 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1886 pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0));
1887 pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2));
1888 pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4));
1889 pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6));
1890 pixel.z = pixel.w = one;
1891 break;
1892 case VK_FORMAT_R16G16B16A16_SFLOAT:
1893 buffer = cBuffer;
1894 pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
1895 pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
1896 pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
1897 pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
1898 pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
1899 pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
1900 pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
1901 pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
1902 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1903 pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
1904 pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
1905 pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
1906 pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
1907 pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
1908 pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
1909 pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
1910 pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
1911 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001912 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001913 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001914 }
1915
Alexis Hetu8af8b402019-05-28 14:48:19 -04001916 if(isSRGB(index))
Nicolas Capens68a82382018-10-02 13:16:55 -04001917 {
1918 sRGBtoLinear(pixel.x);
1919 sRGBtoLinear(pixel.y);
1920 sRGBtoLinear(pixel.z);
1921 }
1922
1923 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
1924 Vector4f sourceFactor;
1925 Vector4f destFactor;
1926
1927 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor);
1928 blendFactor(destFactor, oC, pixel, state.destBlendFactor);
1929
Chris Forbes4d659342019-05-10 13:40:00 -07001930 oC.x *= sourceFactor.x;
1931 oC.y *= sourceFactor.y;
1932 oC.z *= sourceFactor.z;
Nicolas Capens68a82382018-10-02 13:16:55 -04001933
Chris Forbes4d659342019-05-10 13:40:00 -07001934 pixel.x *= destFactor.x;
1935 pixel.y *= destFactor.y;
1936 pixel.z *= destFactor.z;
Nicolas Capens68a82382018-10-02 13:16:55 -04001937
1938 switch(state.blendOperation)
1939 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001940 case VK_BLEND_OP_ADD:
Nicolas Capens68a82382018-10-02 13:16:55 -04001941 oC.x += pixel.x;
1942 oC.y += pixel.y;
1943 oC.z += pixel.z;
1944 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001945 case VK_BLEND_OP_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001946 oC.x -= pixel.x;
1947 oC.y -= pixel.y;
1948 oC.z -= pixel.z;
1949 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001950 case VK_BLEND_OP_REVERSE_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001951 oC.x = pixel.x - oC.x;
1952 oC.y = pixel.y - oC.y;
1953 oC.z = pixel.z - oC.z;
1954 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001955 case VK_BLEND_OP_MIN:
Nicolas Capens68a82382018-10-02 13:16:55 -04001956 oC.x = Min(oC.x, pixel.x);
1957 oC.y = Min(oC.y, pixel.y);
1958 oC.z = Min(oC.z, pixel.z);
1959 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001960 case VK_BLEND_OP_MAX:
Nicolas Capens68a82382018-10-02 13:16:55 -04001961 oC.x = Max(oC.x, pixel.x);
1962 oC.y = Max(oC.y, pixel.y);
1963 oC.z = Max(oC.z, pixel.z);
1964 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001965 case VK_BLEND_OP_SRC_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001966 // No operation
1967 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001968 case VK_BLEND_OP_DST_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001969 oC.x = pixel.x;
1970 oC.y = pixel.y;
1971 oC.z = pixel.z;
1972 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001973 case VK_BLEND_OP_ZERO_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001974 oC.x = Float4(0.0f);
1975 oC.y = Float4(0.0f);
1976 oC.z = Float4(0.0f);
1977 break;
1978 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01001979 UNIMPLEMENTED("VkBlendOp: %d", int(state.blendOperation));
Nicolas Capens68a82382018-10-02 13:16:55 -04001980 }
1981
1982 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
1983 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha);
1984
Chris Forbes4d659342019-05-10 13:40:00 -07001985 oC.w *= sourceFactor.w;
1986 pixel.w *= destFactor.w;
Nicolas Capens68a82382018-10-02 13:16:55 -04001987
1988 switch(state.blendOperationAlpha)
1989 {
Alexis Hetu4ad23222018-11-22 16:40:52 -05001990 case VK_BLEND_OP_ADD:
Nicolas Capens68a82382018-10-02 13:16:55 -04001991 oC.w += pixel.w;
1992 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001993 case VK_BLEND_OP_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001994 oC.w -= pixel.w;
1995 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05001996 case VK_BLEND_OP_REVERSE_SUBTRACT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001997 pixel.w -= oC.w;
1998 oC.w = pixel.w;
1999 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002000 case VK_BLEND_OP_MIN:
Nicolas Capens68a82382018-10-02 13:16:55 -04002001 oC.w = Min(oC.w, pixel.w);
2002 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002003 case VK_BLEND_OP_MAX:
Nicolas Capens68a82382018-10-02 13:16:55 -04002004 oC.w = Max(oC.w, pixel.w);
2005 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002006 case VK_BLEND_OP_SRC_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002007 // No operation
2008 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002009 case VK_BLEND_OP_DST_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002010 oC.w = pixel.w;
2011 break;
Alexis Hetu4ad23222018-11-22 16:40:52 -05002012 case VK_BLEND_OP_ZERO_EXT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002013 oC.w = Float4(0.0f);
2014 break;
2015 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01002016 UNIMPLEMENTED("VkBlendOp: %d", int(state.blendOperationAlpha));
Nicolas Capens68a82382018-10-02 13:16:55 -04002017 }
2018 }
2019
2020 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
2021 {
2022 switch(state.targetFormat[index])
2023 {
Ben Clayton8ab40532019-05-10 16:23:13 +01002024 case VK_FORMAT_R16_SFLOAT:
Alexis Hetudd152e12018-11-14 13:39:28 -05002025 case VK_FORMAT_R32_SFLOAT:
2026 case VK_FORMAT_R32_SINT:
2027 case VK_FORMAT_R32_UINT:
2028 case VK_FORMAT_R16_SINT:
2029 case VK_FORMAT_R16_UINT:
2030 case VK_FORMAT_R8_SINT:
2031 case VK_FORMAT_R8_UINT:
Chris Forbesb0f37162019-05-03 07:25:58 -07002032 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04002033 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002034 case VK_FORMAT_R16G16_SFLOAT:
Alexis Hetudd152e12018-11-14 13:39:28 -05002035 case VK_FORMAT_R32G32_SFLOAT:
2036 case VK_FORMAT_R32G32_SINT:
2037 case VK_FORMAT_R32G32_UINT:
2038 case VK_FORMAT_R16G16_SINT:
2039 case VK_FORMAT_R16G16_UINT:
2040 case VK_FORMAT_R8G8_SINT:
2041 case VK_FORMAT_R8G8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002042 oC.z = oC.x;
2043 oC.x = UnpackLow(oC.x, oC.y);
2044 oC.z = UnpackHigh(oC.z, oC.y);
2045 oC.y = oC.z;
2046 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002047 case VK_FORMAT_R16G16B16A16_SFLOAT:
Alexis Hetudd152e12018-11-14 13:39:28 -05002048 case VK_FORMAT_R32G32B32A32_SFLOAT:
2049 case VK_FORMAT_R32G32B32A32_SINT:
2050 case VK_FORMAT_R32G32B32A32_UINT:
2051 case VK_FORMAT_R16G16B16A16_SINT:
2052 case VK_FORMAT_R16G16B16A16_UINT:
2053 case VK_FORMAT_R8G8B8A8_SINT:
2054 case VK_FORMAT_R8G8B8A8_UINT:
Chris Forbes6407c1a2019-04-15 17:22:57 -07002055 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
2056 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04002057 transpose4x4(oC.x, oC.y, oC.z, oC.w);
2058 break;
2059 default:
Ben Clayton3bb94902019-04-07 13:10:54 +01002060 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04002061 }
2062
2063 int rgbaWriteMask = state.colorWriteActive(index);
2064
2065 Int xMask; // Combination of all masks
2066
2067 if(state.depthTestActive)
2068 {
2069 xMask = zMask;
2070 }
2071 else
2072 {
2073 xMask = cMask;
2074 }
2075
2076 if(state.stencilActive)
2077 {
2078 xMask &= sMask;
2079 }
2080
Ben Clayton8ab40532019-05-10 16:23:13 +01002081 auto targetFormat = state.targetFormat[index];
2082
Nicolas Capens68a82382018-10-02 13:16:55 -04002083 Pointer<Byte> buffer;
2084 Float4 value;
2085
Ben Clayton8ab40532019-05-10 16:23:13 +01002086 switch(targetFormat)
Nicolas Capens68a82382018-10-02 13:16:55 -04002087 {
Alexis Hetudd152e12018-11-14 13:39:28 -05002088 case VK_FORMAT_R32_SFLOAT:
2089 case VK_FORMAT_R32_SINT:
2090 case VK_FORMAT_R32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002091 if(rgbaWriteMask & 0x00000001)
2092 {
2093 buffer = cBuffer + 4 * x;
2094
2095 // FIXME: movlps
2096 value.x = *Pointer<Float>(buffer + 0);
2097 value.y = *Pointer<Float>(buffer + 4);
2098
2099 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2100
2101 // FIXME: movhps
2102 value.z = *Pointer<Float>(buffer + 0);
2103 value.w = *Pointer<Float>(buffer + 4);
2104
2105 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
2106 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
2107 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2108
2109 // FIXME: movhps
2110 *Pointer<Float>(buffer + 0) = oC.x.z;
2111 *Pointer<Float>(buffer + 4) = oC.x.w;
2112
2113 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2114
2115 // FIXME: movlps
2116 *Pointer<Float>(buffer + 0) = oC.x.x;
2117 *Pointer<Float>(buffer + 4) = oC.x.y;
2118 }
2119 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002120 case VK_FORMAT_R16_SFLOAT:
2121 if(rgbaWriteMask & 0x00000001)
2122 {
2123 buffer = cBuffer + 2 * x;
2124
2125 value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
2126 value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
2127
2128 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2129
2130 value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
2131 value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
2132
2133 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2134 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2135 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2136
2137 *Pointer<Half>(buffer + 0) = Half(oC.x.z);
2138 *Pointer<Half>(buffer + 2) = Half(oC.x.w);
2139
2140 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2141
2142 *Pointer<Half>(buffer + 0) = Half(oC.x.x);
2143 *Pointer<Half>(buffer + 2) = Half(oC.x.y);
2144 }
2145 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002146 case VK_FORMAT_R16_SINT:
2147 case VK_FORMAT_R16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002148 if(rgbaWriteMask & 0x00000001)
2149 {
2150 buffer = cBuffer + 2 * x;
2151
2152 UShort4 xyzw;
2153 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2154
2155 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2156
2157 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2158 value = As<Float4>(Int4(xyzw));
2159
2160 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2161 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2162 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2163
Ben Clayton8ab40532019-05-10 16:23:13 +01002164 if(targetFormat == VK_FORMAT_R16_SINT)
Nicolas Capens68a82382018-10-02 13:16:55 -04002165 {
2166 Float component = oC.x.z;
2167 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2168 component = oC.x.w;
2169 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2170
2171 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2172
2173 component = oC.x.x;
2174 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2175 component = oC.x.y;
2176 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2177 }
Alexis Hetudd152e12018-11-14 13:39:28 -05002178 else // VK_FORMAT_R16_UINT
Nicolas Capens68a82382018-10-02 13:16:55 -04002179 {
2180 Float component = oC.x.z;
2181 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2182 component = oC.x.w;
2183 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2184
2185 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2186
2187 component = oC.x.x;
2188 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2189 component = oC.x.y;
2190 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2191 }
2192 }
2193 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002194 case VK_FORMAT_R8_SINT:
2195 case VK_FORMAT_R8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002196 if(rgbaWriteMask & 0x00000001)
2197 {
2198 buffer = cBuffer + x;
2199
2200 UInt xyzw, packedCol;
2201
2202 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
2203 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2204 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
2205
2206 Short4 tmpCol = Short4(As<Int4>(oC.x));
Ben Clayton8ab40532019-05-10 16:23:13 +01002207 if(targetFormat == VK_FORMAT_R8_SINT)
Nicolas Capens68a82382018-10-02 13:16:55 -04002208 {
2209 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
2210 }
2211 else
2212 {
2213 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
2214 }
2215 packedCol = Extract(As<Int2>(tmpCol), 0);
2216
2217 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2218 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2219
2220 *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
2221 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2222 *Pointer<UShort>(buffer) = UShort(packedCol);
2223 }
2224 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002225 case VK_FORMAT_R32G32_SFLOAT:
2226 case VK_FORMAT_R32G32_SINT:
2227 case VK_FORMAT_R32G32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002228 buffer = cBuffer + 8 * x;
2229
2230 value = *Pointer<Float4>(buffer);
2231
2232 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2233 {
2234 Float4 masked = value;
2235 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
2236 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
2237 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
2238 }
2239
2240 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
2241 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
2242 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2243 *Pointer<Float4>(buffer) = oC.x;
2244
2245 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2246
2247 value = *Pointer<Float4>(buffer);
2248
2249 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2250 {
2251 Float4 masked;
2252
2253 masked = value;
2254 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
2255 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
2256 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
2257 }
2258
2259 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
2260 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
2261 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2262 *Pointer<Float4>(buffer) = oC.y;
2263 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002264 case VK_FORMAT_R16G16_SFLOAT:
2265 if((rgbaWriteMask & 0x00000003) != 0x0)
2266 {
2267 buffer = cBuffer + 4 * x;
2268
2269 UInt2 rgbaMask;
2270 UInt2 packedCol;
2271 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
2272 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
2273
2274 UShort4 value = *Pointer<UShort4>(buffer);
2275 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2276 if((rgbaWriteMask & 0x3) != 0x3)
2277 {
2278 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2279 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2280 mergedMask &= rgbaMask;
2281 }
2282 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2283
2284 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2285
2286 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0);
2287 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1);
2288 value = *Pointer<UShort4>(buffer);
2289 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2290 if((rgbaWriteMask & 0x3) != 0x3)
2291 {
2292 mergedMask &= rgbaMask;
2293 }
2294 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2295 }
2296 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002297 case VK_FORMAT_R16G16_SINT:
2298 case VK_FORMAT_R16G16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002299 if((rgbaWriteMask & 0x00000003) != 0x0)
2300 {
2301 buffer = cBuffer + 4 * x;
2302
2303 UInt2 rgbaMask;
2304 UShort4 packedCol = UShort4(As<Int4>(oC.x));
2305 UShort4 value = *Pointer<UShort4>(buffer);
2306 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2307 if((rgbaWriteMask & 0x3) != 0x3)
2308 {
2309 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2310 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2311 mergedMask &= rgbaMask;
2312 }
2313 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2314
2315 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2316
2317 packedCol = UShort4(As<Int4>(oC.y));
2318 value = *Pointer<UShort4>(buffer);
2319 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2320 if((rgbaWriteMask & 0x3) != 0x3)
2321 {
2322 mergedMask &= rgbaMask;
2323 }
2324 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2325 }
2326 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002327 case VK_FORMAT_R8G8_SINT:
2328 case VK_FORMAT_R8G8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002329 if((rgbaWriteMask & 0x00000003) != 0x0)
2330 {
2331 buffer = cBuffer + 2 * x;
2332
2333 Int2 xyzw, packedCol;
2334
2335 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
2336 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2337 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2338
Ben Clayton8ab40532019-05-10 16:23:13 +01002339 if(targetFormat == VK_FORMAT_R8G8_SINT)
Nicolas Capens68a82382018-10-02 13:16:55 -04002340 {
2341 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2342 }
2343 else
2344 {
2345 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2346 }
2347
2348 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2349 if((rgbaWriteMask & 0x3) != 0x3)
2350 {
2351 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2352 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2353 mergedMask &= rgbaMask;
2354 }
2355
2356 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2357
2358 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
2359 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2360 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2361 }
2362 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002363 case VK_FORMAT_R32G32B32A32_SFLOAT:
2364 case VK_FORMAT_R32G32B32A32_SINT:
2365 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002366 buffer = cBuffer + 16 * x;
2367
2368 {
2369 value = *Pointer<Float4>(buffer, 16);
2370
2371 if(rgbaWriteMask != 0x0000000F)
2372 {
2373 Float4 masked = value;
2374 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2375 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2376 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
2377 }
2378
2379 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
2380 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
2381 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2382 *Pointer<Float4>(buffer, 16) = oC.x;
2383 }
2384
2385 {
2386 value = *Pointer<Float4>(buffer + 16, 16);
2387
2388 if(rgbaWriteMask != 0x0000000F)
2389 {
2390 Float4 masked = value;
2391 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2392 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2393 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
2394 }
2395
2396 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
2397 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
2398 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2399 *Pointer<Float4>(buffer + 16, 16) = oC.y;
2400 }
2401
2402 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2403
2404 {
2405 value = *Pointer<Float4>(buffer, 16);
2406
2407 if(rgbaWriteMask != 0x0000000F)
2408 {
2409 Float4 masked = value;
2410 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2411 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2412 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
2413 }
2414
2415 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
2416 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
2417 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2418 *Pointer<Float4>(buffer, 16) = oC.z;
2419 }
2420
2421 {
2422 value = *Pointer<Float4>(buffer + 16, 16);
2423
2424 if(rgbaWriteMask != 0x0000000F)
2425 {
2426 Float4 masked = value;
2427 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2428 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2429 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
2430 }
2431
2432 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
2433 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
2434 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2435 *Pointer<Float4>(buffer + 16, 16) = oC.w;
2436 }
2437 break;
Ben Clayton8ab40532019-05-10 16:23:13 +01002438 case VK_FORMAT_R16G16B16A16_SFLOAT:
2439 if((rgbaWriteMask & 0x0000000F) != 0x0)
2440 {
2441 buffer = cBuffer + 8 * x;
2442
2443 UInt4 rgbaMask;
2444 UInt4 value = *Pointer<UInt4>(buffer);
2445 UInt4 packedCol;
2446 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
2447 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
2448 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2);
2449 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3);
2450 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2451 if((rgbaWriteMask & 0xF) != 0xF)
2452 {
2453 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2454 rgbaMask = UInt4(tmpMask, tmpMask);
2455 mergedMask &= rgbaMask;
2456 }
2457 *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2458
2459 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2460
2461 value = *Pointer<UInt4>(buffer);
2462 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0);
2463 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1);
2464 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2);
2465 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3);
2466 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2467 if((rgbaWriteMask & 0xF) != 0xF)
2468 {
2469 mergedMask &= rgbaMask;
2470 }
2471 *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2472 }
2473 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002474 case VK_FORMAT_R16G16B16A16_SINT:
2475 case VK_FORMAT_R16G16B16A16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04002476 if((rgbaWriteMask & 0x0000000F) != 0x0)
2477 {
2478 buffer = cBuffer + 8 * x;
2479
2480 UInt4 rgbaMask;
2481 UShort8 value = *Pointer<UShort8>(buffer);
2482 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
2483 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2484 if((rgbaWriteMask & 0xF) != 0xF)
2485 {
2486 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2487 rgbaMask = UInt4(tmpMask, tmpMask);
2488 mergedMask &= rgbaMask;
2489 }
2490 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2491
2492 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2493
2494 value = *Pointer<UShort8>(buffer);
2495 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
2496 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2497 if((rgbaWriteMask & 0xF) != 0xF)
2498 {
2499 mergedMask &= rgbaMask;
2500 }
2501 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2502 }
2503 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05002504 case VK_FORMAT_R8G8B8A8_SINT:
2505 case VK_FORMAT_R8G8B8A8_UINT:
Chris Forbes6407c1a2019-04-15 17:22:57 -07002506 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
2507 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04002508 if((rgbaWriteMask & 0x0000000F) != 0x0)
2509 {
2510 UInt2 value, packedCol, mergedMask;
2511
2512 buffer = cBuffer + 4 * x;
2513
Ben Clayton8ab40532019-05-10 16:23:13 +01002514 bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32;
Chris Forbes6407c1a2019-04-15 17:22:57 -07002515
2516 if(isSigned)
Nicolas Capens68a82382018-10-02 13:16:55 -04002517 {
2518 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2519 }
2520 else
2521 {
2522 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2523 }
2524 value = *Pointer<UInt2>(buffer, 16);
2525 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2526 if(rgbaWriteMask != 0xF)
2527 {
2528 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2529 }
2530 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2531
2532 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2533
Chris Forbes6407c1a2019-04-15 17:22:57 -07002534 if(isSigned)
Nicolas Capens68a82382018-10-02 13:16:55 -04002535 {
2536 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
2537 }
2538 else
2539 {
2540 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
2541 }
2542 value = *Pointer<UInt2>(buffer, 16);
2543 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2544 if(rgbaWriteMask != 0xF)
2545 {
2546 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2547 }
2548 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2549 }
2550 break;
Chris Forbesb0f37162019-05-03 07:25:58 -07002551 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
2552 if ((rgbaWriteMask & 0x0000000F) != 0x0)
2553 {
2554 Int2 mergedMask, packedCol, value;
2555 Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
2556 ((As<Int4>(oC.z) & Int4(0x3ff)) << 20) |
2557 ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
2558 ((As<Int4>(oC.x) & Int4(0x3ff)));
2559
2560 buffer = cBuffer + 4 * x;
2561 value = *Pointer<Int2>(buffer, 16);
2562 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2563 if (rgbaWriteMask != 0xF)
2564 {
2565 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
2566 }
2567 *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
2568
2569 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2570
2571 value = *Pointer<Int2>(buffer, 16);
2572 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2573 if (rgbaWriteMask != 0xF)
2574 {
2575 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
2576 }
2577 *Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
2578 }
2579 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04002580 default:
Ben Clayton8ab40532019-05-10 16:23:13 +01002581 UNIMPLEMENTED("VkFormat: %d", int(targetFormat));
Nicolas Capens68a82382018-10-02 13:16:55 -04002582 }
2583 }
2584
2585 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2586 {
2587 return UShort4(cf * Float4(0xFFFF), saturate);
2588 }
2589
2590 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
2591 {
2592 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
2593
2594 c.x = As<UShort4>(c.x) >> 4;
2595 c.y = As<UShort4>(c.y) >> 4;
2596 c.z = As<UShort4>(c.z) >> 4;
2597
2598 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2599 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2600 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2601 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
2602
2603 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2604 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2605 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2606 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
2607
2608 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2609 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2610 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2611 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
2612 }
2613
2614 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
2615 {
2616 c.x = As<UShort4>(c.x) >> 4;
2617 c.y = As<UShort4>(c.y) >> 4;
2618 c.z = As<UShort4>(c.z) >> 4;
2619
2620 linearToSRGB12_16(c);
2621 }
2622
2623 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
2624 {
2625 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
2626
2627 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2628 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2629 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2630 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
2631
2632 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2633 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2634 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2635 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
2636
2637 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2638 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2639 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2640 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
2641 }
2642
2643 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2644 {
2645 Float4 linear = x * x;
2646 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2647
2648 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2649 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002650}