blob: e6c6fb9dacb9e59589074610a7d63a25255dde46 [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman19bac1e2014-05-06 15:23:49 -04003// Copyright(c) 2005-2012 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "VertexRoutine.hpp"
13
14#include "VertexShader.hpp"
15#include "Vertex.hpp"
16#include "Half.hpp"
17#include "Renderer.hpp"
18#include "Constants.hpp"
19#include "Debug.hpp"
20
21namespace sw
22{
John Bauman19bac1e2014-05-06 15:23:49 -040023 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
24 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
25
26 VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : state(state), shader(shader)
John Bauman89401822014-05-06 15:04:28 -040027 {
28 routine = 0;
29 }
30
31 VertexRoutine::~VertexRoutine()
32 {
33 }
34
35 void VertexRoutine::generate()
36 {
John Bauman66b8ab22014-05-06 15:57:45 -040037 Function<Void, Pointer<Byte>, Pointer<Byte>, Pointer<Byte>, Pointer<Byte> > function;
John Bauman89401822014-05-06 15:04:28 -040038 {
39 Pointer<Byte> vertex(function.arg(0));
40 Pointer<Byte> batch(function.arg(1));
41 Pointer<Byte> task(function.arg(2));
42 Pointer<Byte> data(function.arg(3));
43
44 const bool texldl = state.shaderContainsTexldl;
45
46 Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
47 Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
48 Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag);
49
Nicolas Capensc50d35d2015-01-27 01:52:41 -050050 UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
John Bauman89401822014-05-06 15:04:28 -040051
John Bauman19bac1e2014-05-06 15:23:49 -040052 Registers r(shader);
John Bauman89401822014-05-06 15:04:28 -040053 r.data = data;
John Bauman66b8ab22014-05-06 15:57:45 -040054 r.constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
Nicolas Capenscce84632015-06-10 16:09:20 -040055 if(shader && shader->instanceIdDeclared)
Alexis Hetudd8df682015-06-05 17:08:39 -040056 {
57 r.instanceID = *Pointer<Int>(data + OFFSET(DrawData, instanceID));
58 }
John Bauman89401822014-05-06 15:04:28 -040059
60 Do
61 {
62 UInt index = *Pointer<UInt>(batch);
John Bauman66b8ab22014-05-06 15:57:45 -040063 UInt tagIndex = index & 0x0000003C;
64 UInt indexQ = !texldl ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance.
John Bauman89401822014-05-06 15:04:28 -040065
66 If(*Pointer<UInt>(tagCache + tagIndex) != indexQ)
67 {
68 *Pointer<UInt>(tagCache + tagIndex) = indexQ;
69
70 readInput(r, indexQ);
71 pipeline(r);
72 postTransform(r);
73 computeClipFlags(r);
74
75 Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex));
76 writeCache(cacheLine0, r);
77 }
78
John Bauman66b8ab22014-05-06 15:57:45 -040079 UInt cacheIndex = index & 0x0000003F;
John Bauman89401822014-05-06 15:04:28 -040080 Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
81 writeVertex(vertex, cacheLine);
82
83 vertex += sizeof(Vertex);
84 batch += sizeof(unsigned int);
Nicolas Capensc50d35d2015-01-27 01:52:41 -050085 vertexCount--;
John Bauman89401822014-05-06 15:04:28 -040086 }
Nicolas Capensc50d35d2015-01-27 01:52:41 -050087 Until(vertexCount == 0)
John Bauman89401822014-05-06 15:04:28 -040088
89 Return();
90 }
91
John Bauman19bac1e2014-05-06 15:23:49 -040092 routine = function(L"VertexRoutine_%0.8X", state.shaderID);
John Bauman89401822014-05-06 15:04:28 -040093 }
94
95 Routine *VertexRoutine::getRoutine()
96 {
97 return routine;
98 }
99
100 void VertexRoutine::readInput(Registers &r, UInt &index)
101 {
Nicolas Capens0f250902015-06-25 15:25:29 -0400102 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
John Bauman89401822014-05-06 15:04:28 -0400103 {
John Bauman66b8ab22014-05-06 15:57:45 -0400104 Pointer<Byte> input = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,input) + sizeof(void*) * i);
John Bauman89401822014-05-06 15:04:28 -0400105 UInt stride = *Pointer<UInt>(r.data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i);
106
107 r.v[i] = readStream(r, input, stride, state.input[i], index);
108 }
109 }
110
111 void VertexRoutine::computeClipFlags(Registers &r)
112 {
113 int pos = state.positionRegister;
114
115 // Backtransform
116 if(state.preTransformed)
117 {
John Bauman19bac1e2014-05-06 15:23:49 -0400118 Float4 rhw = Float4(1.0f) / r.o[pos].w;
John Bauman89401822014-05-06 15:04:28 -0400119
John Bauman19bac1e2014-05-06 15:23:49 -0400120 Float4 W = *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f);
121 Float4 H = *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f);
122 Float4 L = *Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f);
123 Float4 T = *Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f);
John Bauman89401822014-05-06 15:04:28 -0400124
John Bauman19bac1e2014-05-06 15:23:49 -0400125 r.o[pos].x = (r.o[pos].x - L) / W * rhw;
126 r.o[pos].y = (r.o[pos].y - T) / H * rhw;
127 r.o[pos].z = r.o[pos].z * rhw;
128 r.o[pos].w = rhw;
John Bauman89401822014-05-06 15:04:28 -0400129 }
130
131 if(state.superSampling)
132 {
John Bauman19bac1e2014-05-06 15:23:49 -0400133 r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,XXXX)) * r.o[pos].w;
134 r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,YYYY)) * r.o[pos].w;
John Bauman89401822014-05-06 15:04:28 -0400135 }
136
Nicolas Capens5491cb42015-07-02 15:33:29 -0400137 Int4 maxX = CmpLT(r.o[pos].w, r.o[pos].x);
138 Int4 maxY = CmpLT(r.o[pos].w, r.o[pos].y);
John Bauman19bac1e2014-05-06 15:23:49 -0400139 Int4 maxZ = CmpLT(r.o[pos].w, r.o[pos].z);
John Bauman89401822014-05-06 15:04:28 -0400140
Nicolas Capens5491cb42015-07-02 15:33:29 -0400141 Int4 minX = CmpNLE(-r.o[pos].w, r.o[pos].x);
142 Int4 minY = CmpNLE(-r.o[pos].w, r.o[pos].y);
John Bauman19bac1e2014-05-06 15:23:49 -0400143 Int4 minZ = CmpNLE(Float4(0.0f), r.o[pos].z);
John Bauman89401822014-05-06 15:04:28 -0400144
145 Int flags;
146
147 flags = SignMask(maxX);
148 r.clipFlags = *Pointer<Int>(r.constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing
149 flags = SignMask(maxY);
150 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,maxY) + flags * 4);
151 flags = SignMask(maxZ);
152 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,maxZ) + flags * 4);
153 flags = SignMask(minX);
154 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minX) + flags * 4);
155 flags = SignMask(minY);
156 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minY) + flags * 4);
157 flags = SignMask(minZ);
158 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minZ) + flags * 4);
159
John Bauman19bac1e2014-05-06 15:23:49 -0400160 Int4 finiteX = CmpLE(Abs(r.o[pos].x), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
161 Int4 finiteY = CmpLE(Abs(r.o[pos].y), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
162 Int4 finiteZ = CmpLE(Abs(r.o[pos].z), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
John Bauman89401822014-05-06 15:04:28 -0400163
164 flags = SignMask(finiteX & finiteY & finiteZ);
165 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,fini) + flags * 4);
166
167 if(state.preTransformed)
168 {
169 r.clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane
170 }
171 }
172
John Bauman19bac1e2014-05-06 15:23:49 -0400173 Vector4f VertexRoutine::readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index)
John Bauman89401822014-05-06 15:04:28 -0400174 {
175 const bool texldl = state.shaderContainsTexldl;
176
John Bauman19bac1e2014-05-06 15:23:49 -0400177 Vector4f v;
John Bauman89401822014-05-06 15:04:28 -0400178
179 Pointer<Byte> source0 = buffer + index * stride;
180 Pointer<Byte> source1 = source0 + (!texldl ? stride : 0);
181 Pointer<Byte> source2 = source1 + (!texldl ? stride : 0);
182 Pointer<Byte> source3 = source2 + (!texldl ? stride : 0);
183
184 switch(stream.type)
185 {
186 case STREAMTYPE_FLOAT:
187 {
188 if(stream.count == 0)
189 {
190 // Null stream, all default components
191 }
192 else if(stream.count == 1)
193 {
194 v.x.x = *Pointer<Float>(source0);
195 v.x.y = *Pointer<Float>(source1);
196 v.x.z = *Pointer<Float>(source2);
197 v.x.w = *Pointer<Float>(source3);
198 }
199 else
200 {
201 v.x = *Pointer<Float4>(source0);
202 v.y = *Pointer<Float4>(source1);
203 v.z = *Pointer<Float4>(source2);
204 v.w = *Pointer<Float4>(source3);
205
206 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
207 }
208 }
209 break;
210 case STREAMTYPE_BYTE:
211 {
212 v.x = Float4(*Pointer<Byte4>(source0));
213 v.y = Float4(*Pointer<Byte4>(source1));
214 v.z = Float4(*Pointer<Byte4>(source2));
215 v.w = Float4(*Pointer<Byte4>(source3));
216
217 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
218
219 if(stream.normalized)
220 {
221 if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
222 if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
223 if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
224 if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
225 }
226 }
227 break;
228 case STREAMTYPE_SBYTE:
229 {
230 v.x = Float4(*Pointer<SByte4>(source0));
231 v.y = Float4(*Pointer<SByte4>(source1));
232 v.z = Float4(*Pointer<SByte4>(source2));
233 v.w = Float4(*Pointer<SByte4>(source3));
234
235 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
236
237 if(stream.normalized)
238 {
239 if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
240 if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
241 if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
242 if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
243 }
244 }
245 break;
246 case STREAMTYPE_COLOR:
247 {
248 v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
249 v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
250 v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
251 v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
252
253 transpose4x4(v.x, v.y, v.z, v.w);
254
255 // Swap red and blue
256 Float4 t = v.x;
257 v.x = v.z;
258 v.z = t;
259 }
260 break;
261 case STREAMTYPE_SHORT:
262 {
263 v.x = Float4(*Pointer<Short4>(source0));
264 v.y = Float4(*Pointer<Short4>(source1));
265 v.z = Float4(*Pointer<Short4>(source2));
266 v.w = Float4(*Pointer<Short4>(source3));
267
268 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
269
270 if(stream.normalized)
271 {
272 if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
273 if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
274 if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
275 if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
276 }
277 }
278 break;
279 case STREAMTYPE_USHORT:
280 {
281 v.x = Float4(*Pointer<UShort4>(source0));
282 v.y = Float4(*Pointer<UShort4>(source1));
283 v.z = Float4(*Pointer<UShort4>(source2));
284 v.w = Float4(*Pointer<UShort4>(source3));
285
286 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
287
288 if(stream.normalized)
289 {
290 if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
291 if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
292 if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
293 if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
294 }
295 }
296 break;
297 case STREAMTYPE_UDEC3:
298 {
299 // FIXME: Vectorize
300 {
301 Int x, y, z;
302
303 x = y = z = *Pointer<Int>(source0);
304
305 v.x.x = Float(x & 0x000003FF);
306 v.x.y = Float(y & 0x000FFC00);
307 v.x.z = Float(z & 0x3FF00000);
308 }
309
310 {
311 Int x, y, z;
312
313 x = y = z = *Pointer<Int>(source1);
314
315 v.y.x = Float(x & 0x000003FF);
316 v.y.y = Float(y & 0x000FFC00);
317 v.y.z = Float(z & 0x3FF00000);
318 }
319
320 {
321 Int x, y, z;
322
323 x = y = z = *Pointer<Int>(source2);
324
325 v.z.x = Float(x & 0x000003FF);
326 v.z.y = Float(y & 0x000FFC00);
327 v.z.z = Float(z & 0x3FF00000);
328 }
329
330 {
331 Int x, y, z;
332
333 x = y = z = *Pointer<Int>(source3);
334
335 v.w.x = Float(x & 0x000003FF);
336 v.w.y = Float(y & 0x000FFC00);
337 v.w.z = Float(z & 0x3FF00000);
338 }
339
340 transpose4x3(v.x, v.y, v.z, v.w);
341
John Bauman19bac1e2014-05-06 15:23:49 -0400342 v.y *= Float4(1.0f / 0x00000400);
343 v.z *= Float4(1.0f / 0x00100000);
John Bauman89401822014-05-06 15:04:28 -0400344 }
345 break;
346 case STREAMTYPE_DEC3N:
347 {
348 // FIXME: Vectorize
349 {
350 Int x, y, z;
351
352 x = y = z = *Pointer<Int>(source0);
353
354 v.x.x = Float((x << 22) & 0xFFC00000);
355 v.x.y = Float((y << 12) & 0xFFC00000);
356 v.x.z = Float((z << 2) & 0xFFC00000);
357 }
358
359 {
360 Int x, y, z;
361
362 x = y = z = *Pointer<Int>(source1);
363
364 v.y.x = Float((x << 22) & 0xFFC00000);
365 v.y.y = Float((y << 12) & 0xFFC00000);
366 v.y.z = Float((z << 2) & 0xFFC00000);
367 }
368
369 {
370 Int x, y, z;
371
372 x = y = z = *Pointer<Int>(source2);
373
374 v.z.x = Float((x << 22) & 0xFFC00000);
375 v.z.y = Float((y << 12) & 0xFFC00000);
376 v.z.z = Float((z << 2) & 0xFFC00000);
377 }
378
379 {
380 Int x, y, z;
381
382 x = y = z = *Pointer<Int>(source3);
383
384 v.w.x = Float((x << 22) & 0xFFC00000);
385 v.w.y = Float((y << 12) & 0xFFC00000);
386 v.w.z = Float((z << 2) & 0xFFC00000);
387 }
388
389 transpose4x3(v.x, v.y, v.z, v.w);
390
John Bauman19bac1e2014-05-06 15:23:49 -0400391 v.x *= Float4(1.0f / 0x00400000 / 511.0f);
392 v.y *= Float4(1.0f / 0x00400000 / 511.0f);
393 v.z *= Float4(1.0f / 0x00400000 / 511.0f);
John Bauman89401822014-05-06 15:04:28 -0400394 }
395 break;
396 case STREAMTYPE_FIXED:
397 {
398 v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
399 v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
400 v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
401 v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
402
403 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
404 }
405 break;
406 case STREAMTYPE_HALF:
407 {
408 if(stream.count >= 1)
409 {
410 UShort x0 = *Pointer<UShort>(source0 + 0);
411 UShort x1 = *Pointer<UShort>(source1 + 0);
412 UShort x2 = *Pointer<UShort>(source2 + 0);
413 UShort x3 = *Pointer<UShort>(source3 + 0);
414
415 v.x.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x0) * 4);
416 v.x.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x1) * 4);
417 v.x.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x2) * 4);
418 v.x.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x3) * 4);
419 }
420
421 if(stream.count >= 2)
422 {
423 UShort y0 = *Pointer<UShort>(source0 + 2);
424 UShort y1 = *Pointer<UShort>(source1 + 2);
425 UShort y2 = *Pointer<UShort>(source2 + 2);
426 UShort y3 = *Pointer<UShort>(source3 + 2);
427
428 v.y.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y0) * 4);
429 v.y.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y1) * 4);
430 v.y.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y2) * 4);
431 v.y.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y3) * 4);
432 }
433
434 if(stream.count >= 3)
435 {
436 UShort z0 = *Pointer<UShort>(source0 + 4);
437 UShort z1 = *Pointer<UShort>(source1 + 4);
438 UShort z2 = *Pointer<UShort>(source2 + 4);
439 UShort z3 = *Pointer<UShort>(source3 + 4);
440
441 v.z.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z0) * 4);
442 v.z.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z1) * 4);
443 v.z.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z2) * 4);
444 v.z.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z3) * 4);
445 }
446
447 if(stream.count >= 4)
448 {
449 UShort w0 = *Pointer<UShort>(source0 + 6);
450 UShort w1 = *Pointer<UShort>(source1 + 6);
451 UShort w2 = *Pointer<UShort>(source2 + 6);
452 UShort w3 = *Pointer<UShort>(source3 + 6);
453
454 v.w.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w0) * 4);
455 v.w.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w1) * 4);
456 v.w.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w2) * 4);
457 v.w.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w3) * 4);
458 }
459 }
460 break;
461 case STREAMTYPE_INDICES:
462 {
463 v.x.x = *Pointer<Float>(source0);
464 v.x.y = *Pointer<Float>(source1);
465 v.x.z = *Pointer<Float>(source2);
466 v.x.w = *Pointer<Float>(source3);
467 }
468 break;
469 default:
470 ASSERT(false);
471 }
472
John Bauman19bac1e2014-05-06 15:23:49 -0400473 if(stream.count < 1) v.x = Float4(0.0f);
474 if(stream.count < 2) v.y = Float4(0.0f);
475 if(stream.count < 3) v.z = Float4(0.0f);
476 if(stream.count < 4) v.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -0400477
478 return v;
479 }
480
481 void VertexRoutine::postTransform(Registers &r)
482 {
483 int pos = state.positionRegister;
484
Nicolas Capens5491cb42015-07-02 15:33:29 -0400485 if(!halfIntegerCoordinates)
John Bauman89401822014-05-06 15:04:28 -0400486 {
Nicolas Capens5491cb42015-07-02 15:33:29 -0400487 r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w;
488 r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w;
John Bauman19bac1e2014-05-06 15:23:49 -0400489 }
John Bauman89401822014-05-06 15:04:28 -0400490
Nicolas Capens5ce0ea62015-07-02 16:55:29 -0400491 if(symmetricNormalizedDepth && !state.fixedFunction)
John Bauman19bac1e2014-05-06 15:23:49 -0400492 {
493 r.o[pos].z = (r.o[pos].z + r.o[pos].w) * Float4(0.5f);
John Bauman89401822014-05-06 15:04:28 -0400494 }
495 }
496
497 void VertexRoutine::writeCache(Pointer<Byte> &cacheLine, Registers &r)
498 {
John Bauman19bac1e2014-05-06 15:23:49 -0400499 Vector4f v;
John Bauman89401822014-05-06 15:04:28 -0400500
501 for(int i = 0; i < 12; i++)
502 {
503 if(state.output[i].write)
504 {
John Bauman19bac1e2014-05-06 15:23:49 -0400505 v.x = r.o[i].x;
506 v.y = r.o[i].y;
507 v.z = r.o[i].z;
508 v.w = r.o[i].w;
John Bauman89401822014-05-06 15:04:28 -0400509
510 if(state.output[i].xClamp)
511 {
John Bauman19bac1e2014-05-06 15:23:49 -0400512 v.x = Max(v.x, Float4(0.0f));
513 v.x = Min(v.x, Float4(1.0f));
John Bauman89401822014-05-06 15:04:28 -0400514 }
515
516 if(state.output[i].yClamp)
517 {
John Bauman19bac1e2014-05-06 15:23:49 -0400518 v.y = Max(v.y, Float4(0.0f));
519 v.y = Min(v.y, Float4(1.0f));
John Bauman89401822014-05-06 15:04:28 -0400520 }
521
522 if(state.output[i].zClamp)
523 {
John Bauman19bac1e2014-05-06 15:23:49 -0400524 v.z = Max(v.z, Float4(0.0f));
525 v.z = Min(v.z, Float4(1.0f));
John Bauman89401822014-05-06 15:04:28 -0400526 }
527
528 if(state.output[i].wClamp)
529 {
John Bauman19bac1e2014-05-06 15:23:49 -0400530 v.w = Max(v.w, Float4(0.0f));
531 v.w = Min(v.w, Float4(1.0f));
John Bauman89401822014-05-06 15:04:28 -0400532 }
533
534 if(state.output[i].write == 0x01)
535 {
536 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0) = v.x.x;
537 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1) = v.x.y;
538 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2) = v.x.z;
539 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3) = v.x.w;
540 }
541 else
542 {
543 if(state.output[i].write == 0x02)
544 {
545 transpose2x4(v.x, v.y, v.z, v.w);
546 }
547 else
548 {
549 transpose4x4(v.x, v.y, v.z, v.w);
550 }
551
552 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x;
553 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y;
554 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z;
555 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w;
556 }
557 }
558 }
559
560 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (r.clipFlags >> 0) & 0x0000000FF; // FIXME: unsigned char Vertex::clipFlags
561 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (r.clipFlags >> 8) & 0x0000000FF;
562 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (r.clipFlags >> 16) & 0x0000000FF;
563 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (r.clipFlags >> 24) & 0x0000000FF;
564
565 int pos = state.positionRegister;
566
John Bauman19bac1e2014-05-06 15:23:49 -0400567 v.x = r.o[pos].x;
568 v.y = r.o[pos].y;
569 v.z = r.o[pos].z;
570 v.w = r.o[pos].w;
John Bauman89401822014-05-06 15:04:28 -0400571
John Bauman19bac1e2014-05-06 15:23:49 -0400572 Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
John Bauman89401822014-05-06 15:04:28 -0400573 Float4 rhw = Float4(1.0f) / w;
574
John Bauman19bac1e2014-05-06 15:23:49 -0400575 v.x = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16))));
576 v.y = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16))));
John Bauman89401822014-05-06 15:04:28 -0400577 v.z = v.z * rhw;
578 v.w = rhw;
579
580 transpose4x4(v.x, v.y, v.z, v.w);
581
582 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x;
583 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y;
584 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z;
585 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w;
586 }
587
588 void VertexRoutine::writeVertex(Pointer<Byte> &vertex, Pointer<Byte> &cache)
589 {
590 for(int i = 0; i < 12; i++)
591 {
592 if(state.output[i].write)
593 {
594 *Pointer<Float4>(vertex + OFFSET(Vertex,v[i])) = *Pointer<Float4>(cache + OFFSET(Vertex,v[i]));
595 }
596 }
597
598 *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags));
599 *Pointer<Float4>(vertex + OFFSET(Vertex,X)) = *Pointer<Float4>(cache + OFFSET(Vertex,X));
600 }
601}