John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 1 | // SwiftShader Software Renderer |
| 2 | // |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 3 | // Copyright(c) 2005-2012 TransGaming Inc. |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 4 | // |
| 5 | // All rights reserved. No part of this software may be copied, distributed, transmitted, |
| 6 | // transcribed, stored in a retrieval system, translated into any human or computer |
| 7 | // language by any means, or disclosed to third parties without the explicit written |
| 8 | // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express |
| 9 | // or implied, including but not limited to any patent rights, are granted to you. |
| 10 | // |
| 11 | |
| 12 | #include "VertexRoutine.hpp" |
| 13 | |
| 14 | #include "VertexShader.hpp" |
| 15 | #include "Vertex.hpp" |
| 16 | #include "Half.hpp" |
| 17 | #include "Renderer.hpp" |
| 18 | #include "Constants.hpp" |
| 19 | #include "Debug.hpp" |
| 20 | |
| 21 | namespace sw |
| 22 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 23 | extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates |
| 24 | extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] |
| 25 | |
| 26 | VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : state(state), shader(shader) |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 27 | { |
| 28 | routine = 0; |
| 29 | } |
| 30 | |
| 31 | VertexRoutine::~VertexRoutine() |
| 32 | { |
| 33 | } |
| 34 | |
| 35 | void VertexRoutine::generate() |
| 36 | { |
John Bauman | 66b8ab2 | 2014-05-06 15:57:45 -0400 | [diff] [blame] | 37 | Function<Void, Pointer<Byte>, Pointer<Byte>, Pointer<Byte>, Pointer<Byte> > function; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 38 | { |
| 39 | Pointer<Byte> vertex(function.arg(0)); |
| 40 | Pointer<Byte> batch(function.arg(1)); |
| 41 | Pointer<Byte> task(function.arg(2)); |
| 42 | Pointer<Byte> data(function.arg(3)); |
| 43 | |
| 44 | const bool texldl = state.shaderContainsTexldl; |
| 45 | |
| 46 | Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); |
| 47 | Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); |
| 48 | Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag); |
| 49 | |
Nicolas Capens | c50d35d | 2015-01-27 01:52:41 -0500 | [diff] [blame] | 50 | UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount)); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 51 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 52 | Registers r(shader); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 53 | r.data = data; |
John Bauman | 66b8ab2 | 2014-05-06 15:57:45 -0400 | [diff] [blame] | 54 | r.constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants)); |
Nicolas Capens | cce8463 | 2015-06-10 16:09:20 -0400 | [diff] [blame] | 55 | if(shader && shader->instanceIdDeclared) |
Alexis Hetu | dd8df68 | 2015-06-05 17:08:39 -0400 | [diff] [blame] | 56 | { |
| 57 | r.instanceID = *Pointer<Int>(data + OFFSET(DrawData, instanceID)); |
| 58 | } |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 59 | |
| 60 | Do |
| 61 | { |
| 62 | UInt index = *Pointer<UInt>(batch); |
John Bauman | 66b8ab2 | 2014-05-06 15:57:45 -0400 | [diff] [blame] | 63 | UInt tagIndex = index & 0x0000003C; |
| 64 | UInt indexQ = !texldl ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance. |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 65 | |
| 66 | If(*Pointer<UInt>(tagCache + tagIndex) != indexQ) |
| 67 | { |
| 68 | *Pointer<UInt>(tagCache + tagIndex) = indexQ; |
| 69 | |
| 70 | readInput(r, indexQ); |
| 71 | pipeline(r); |
| 72 | postTransform(r); |
| 73 | computeClipFlags(r); |
| 74 | |
| 75 | Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex)); |
| 76 | writeCache(cacheLine0, r); |
| 77 | } |
| 78 | |
John Bauman | 66b8ab2 | 2014-05-06 15:57:45 -0400 | [diff] [blame] | 79 | UInt cacheIndex = index & 0x0000003F; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 80 | Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex)); |
| 81 | writeVertex(vertex, cacheLine); |
| 82 | |
| 83 | vertex += sizeof(Vertex); |
| 84 | batch += sizeof(unsigned int); |
Nicolas Capens | c50d35d | 2015-01-27 01:52:41 -0500 | [diff] [blame] | 85 | vertexCount--; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 86 | } |
Nicolas Capens | c50d35d | 2015-01-27 01:52:41 -0500 | [diff] [blame] | 87 | Until(vertexCount == 0) |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 88 | |
| 89 | Return(); |
| 90 | } |
| 91 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 92 | routine = function(L"VertexRoutine_%0.8X", state.shaderID); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 93 | } |
| 94 | |
| 95 | Routine *VertexRoutine::getRoutine() |
| 96 | { |
| 97 | return routine; |
| 98 | } |
| 99 | |
| 100 | void VertexRoutine::readInput(Registers &r, UInt &index) |
| 101 | { |
Nicolas Capens | 0f25090 | 2015-06-25 15:25:29 -0400 | [diff] [blame] | 102 | for(int i = 0; i < VERTEX_ATTRIBUTES; i++) |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 103 | { |
John Bauman | 66b8ab2 | 2014-05-06 15:57:45 -0400 | [diff] [blame] | 104 | Pointer<Byte> input = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,input) + sizeof(void*) * i); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 105 | UInt stride = *Pointer<UInt>(r.data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i); |
| 106 | |
| 107 | r.v[i] = readStream(r, input, stride, state.input[i], index); |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | void VertexRoutine::computeClipFlags(Registers &r) |
| 112 | { |
| 113 | int pos = state.positionRegister; |
| 114 | |
| 115 | // Backtransform |
| 116 | if(state.preTransformed) |
| 117 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 118 | Float4 rhw = Float4(1.0f) / r.o[pos].w; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 119 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 120 | Float4 W = *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f); |
| 121 | Float4 H = *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f); |
| 122 | Float4 L = *Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f); |
| 123 | Float4 T = *Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 124 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 125 | r.o[pos].x = (r.o[pos].x - L) / W * rhw; |
| 126 | r.o[pos].y = (r.o[pos].y - T) / H * rhw; |
| 127 | r.o[pos].z = r.o[pos].z * rhw; |
| 128 | r.o[pos].w = rhw; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 129 | } |
| 130 | |
| 131 | if(state.superSampling) |
| 132 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 133 | r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,XXXX)) * r.o[pos].w; |
| 134 | r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,YYYY)) * r.o[pos].w; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 135 | } |
| 136 | |
Nicolas Capens | 5491cb4 | 2015-07-02 15:33:29 -0400 | [diff] [blame] | 137 | Int4 maxX = CmpLT(r.o[pos].w, r.o[pos].x); |
| 138 | Int4 maxY = CmpLT(r.o[pos].w, r.o[pos].y); |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 139 | Int4 maxZ = CmpLT(r.o[pos].w, r.o[pos].z); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 140 | |
Nicolas Capens | 5491cb4 | 2015-07-02 15:33:29 -0400 | [diff] [blame] | 141 | Int4 minX = CmpNLE(-r.o[pos].w, r.o[pos].x); |
| 142 | Int4 minY = CmpNLE(-r.o[pos].w, r.o[pos].y); |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 143 | Int4 minZ = CmpNLE(Float4(0.0f), r.o[pos].z); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 144 | |
| 145 | Int flags; |
| 146 | |
| 147 | flags = SignMask(maxX); |
| 148 | r.clipFlags = *Pointer<Int>(r.constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing |
| 149 | flags = SignMask(maxY); |
| 150 | r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,maxY) + flags * 4); |
| 151 | flags = SignMask(maxZ); |
| 152 | r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,maxZ) + flags * 4); |
| 153 | flags = SignMask(minX); |
| 154 | r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minX) + flags * 4); |
| 155 | flags = SignMask(minY); |
| 156 | r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minY) + flags * 4); |
| 157 | flags = SignMask(minZ); |
| 158 | r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minZ) + flags * 4); |
| 159 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 160 | Int4 finiteX = CmpLE(Abs(r.o[pos].x), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); |
| 161 | Int4 finiteY = CmpLE(Abs(r.o[pos].y), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); |
| 162 | Int4 finiteZ = CmpLE(Abs(r.o[pos].z), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 163 | |
| 164 | flags = SignMask(finiteX & finiteY & finiteZ); |
| 165 | r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,fini) + flags * 4); |
| 166 | |
| 167 | if(state.preTransformed) |
| 168 | { |
| 169 | r.clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane |
| 170 | } |
| 171 | } |
| 172 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 173 | Vector4f VertexRoutine::readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 174 | { |
| 175 | const bool texldl = state.shaderContainsTexldl; |
| 176 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 177 | Vector4f v; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 178 | |
| 179 | Pointer<Byte> source0 = buffer + index * stride; |
| 180 | Pointer<Byte> source1 = source0 + (!texldl ? stride : 0); |
| 181 | Pointer<Byte> source2 = source1 + (!texldl ? stride : 0); |
| 182 | Pointer<Byte> source3 = source2 + (!texldl ? stride : 0); |
| 183 | |
| 184 | switch(stream.type) |
| 185 | { |
| 186 | case STREAMTYPE_FLOAT: |
| 187 | { |
| 188 | if(stream.count == 0) |
| 189 | { |
| 190 | // Null stream, all default components |
| 191 | } |
| 192 | else if(stream.count == 1) |
| 193 | { |
| 194 | v.x.x = *Pointer<Float>(source0); |
| 195 | v.x.y = *Pointer<Float>(source1); |
| 196 | v.x.z = *Pointer<Float>(source2); |
| 197 | v.x.w = *Pointer<Float>(source3); |
| 198 | } |
| 199 | else |
| 200 | { |
| 201 | v.x = *Pointer<Float4>(source0); |
| 202 | v.y = *Pointer<Float4>(source1); |
| 203 | v.z = *Pointer<Float4>(source2); |
| 204 | v.w = *Pointer<Float4>(source3); |
| 205 | |
| 206 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
| 207 | } |
| 208 | } |
| 209 | break; |
| 210 | case STREAMTYPE_BYTE: |
| 211 | { |
| 212 | v.x = Float4(*Pointer<Byte4>(source0)); |
| 213 | v.y = Float4(*Pointer<Byte4>(source1)); |
| 214 | v.z = Float4(*Pointer<Byte4>(source2)); |
| 215 | v.w = Float4(*Pointer<Byte4>(source3)); |
| 216 | |
| 217 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
| 218 | |
| 219 | if(stream.normalized) |
| 220 | { |
| 221 | if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte)); |
| 222 | if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte)); |
| 223 | if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte)); |
| 224 | if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte)); |
| 225 | } |
| 226 | } |
| 227 | break; |
| 228 | case STREAMTYPE_SBYTE: |
| 229 | { |
| 230 | v.x = Float4(*Pointer<SByte4>(source0)); |
| 231 | v.y = Float4(*Pointer<SByte4>(source1)); |
| 232 | v.z = Float4(*Pointer<SByte4>(source2)); |
| 233 | v.w = Float4(*Pointer<SByte4>(source3)); |
| 234 | |
| 235 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
| 236 | |
| 237 | if(stream.normalized) |
| 238 | { |
| 239 | if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte)); |
| 240 | if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte)); |
| 241 | if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte)); |
| 242 | if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte)); |
| 243 | } |
| 244 | } |
| 245 | break; |
| 246 | case STREAMTYPE_COLOR: |
| 247 | { |
| 248 | v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte)); |
| 249 | v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte)); |
| 250 | v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte)); |
| 251 | v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte)); |
| 252 | |
| 253 | transpose4x4(v.x, v.y, v.z, v.w); |
| 254 | |
| 255 | // Swap red and blue |
| 256 | Float4 t = v.x; |
| 257 | v.x = v.z; |
| 258 | v.z = t; |
| 259 | } |
| 260 | break; |
| 261 | case STREAMTYPE_SHORT: |
| 262 | { |
| 263 | v.x = Float4(*Pointer<Short4>(source0)); |
| 264 | v.y = Float4(*Pointer<Short4>(source1)); |
| 265 | v.z = Float4(*Pointer<Short4>(source2)); |
| 266 | v.w = Float4(*Pointer<Short4>(source3)); |
| 267 | |
| 268 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
| 269 | |
| 270 | if(stream.normalized) |
| 271 | { |
| 272 | if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort)); |
| 273 | if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort)); |
| 274 | if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort)); |
| 275 | if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort)); |
| 276 | } |
| 277 | } |
| 278 | break; |
| 279 | case STREAMTYPE_USHORT: |
| 280 | { |
| 281 | v.x = Float4(*Pointer<UShort4>(source0)); |
| 282 | v.y = Float4(*Pointer<UShort4>(source1)); |
| 283 | v.z = Float4(*Pointer<UShort4>(source2)); |
| 284 | v.w = Float4(*Pointer<UShort4>(source3)); |
| 285 | |
| 286 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
| 287 | |
| 288 | if(stream.normalized) |
| 289 | { |
| 290 | if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort)); |
| 291 | if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort)); |
| 292 | if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort)); |
| 293 | if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort)); |
| 294 | } |
| 295 | } |
| 296 | break; |
| 297 | case STREAMTYPE_UDEC3: |
| 298 | { |
| 299 | // FIXME: Vectorize |
| 300 | { |
| 301 | Int x, y, z; |
| 302 | |
| 303 | x = y = z = *Pointer<Int>(source0); |
| 304 | |
| 305 | v.x.x = Float(x & 0x000003FF); |
| 306 | v.x.y = Float(y & 0x000FFC00); |
| 307 | v.x.z = Float(z & 0x3FF00000); |
| 308 | } |
| 309 | |
| 310 | { |
| 311 | Int x, y, z; |
| 312 | |
| 313 | x = y = z = *Pointer<Int>(source1); |
| 314 | |
| 315 | v.y.x = Float(x & 0x000003FF); |
| 316 | v.y.y = Float(y & 0x000FFC00); |
| 317 | v.y.z = Float(z & 0x3FF00000); |
| 318 | } |
| 319 | |
| 320 | { |
| 321 | Int x, y, z; |
| 322 | |
| 323 | x = y = z = *Pointer<Int>(source2); |
| 324 | |
| 325 | v.z.x = Float(x & 0x000003FF); |
| 326 | v.z.y = Float(y & 0x000FFC00); |
| 327 | v.z.z = Float(z & 0x3FF00000); |
| 328 | } |
| 329 | |
| 330 | { |
| 331 | Int x, y, z; |
| 332 | |
| 333 | x = y = z = *Pointer<Int>(source3); |
| 334 | |
| 335 | v.w.x = Float(x & 0x000003FF); |
| 336 | v.w.y = Float(y & 0x000FFC00); |
| 337 | v.w.z = Float(z & 0x3FF00000); |
| 338 | } |
| 339 | |
| 340 | transpose4x3(v.x, v.y, v.z, v.w); |
| 341 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 342 | v.y *= Float4(1.0f / 0x00000400); |
| 343 | v.z *= Float4(1.0f / 0x00100000); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 344 | } |
| 345 | break; |
| 346 | case STREAMTYPE_DEC3N: |
| 347 | { |
| 348 | // FIXME: Vectorize |
| 349 | { |
| 350 | Int x, y, z; |
| 351 | |
| 352 | x = y = z = *Pointer<Int>(source0); |
| 353 | |
| 354 | v.x.x = Float((x << 22) & 0xFFC00000); |
| 355 | v.x.y = Float((y << 12) & 0xFFC00000); |
| 356 | v.x.z = Float((z << 2) & 0xFFC00000); |
| 357 | } |
| 358 | |
| 359 | { |
| 360 | Int x, y, z; |
| 361 | |
| 362 | x = y = z = *Pointer<Int>(source1); |
| 363 | |
| 364 | v.y.x = Float((x << 22) & 0xFFC00000); |
| 365 | v.y.y = Float((y << 12) & 0xFFC00000); |
| 366 | v.y.z = Float((z << 2) & 0xFFC00000); |
| 367 | } |
| 368 | |
| 369 | { |
| 370 | Int x, y, z; |
| 371 | |
| 372 | x = y = z = *Pointer<Int>(source2); |
| 373 | |
| 374 | v.z.x = Float((x << 22) & 0xFFC00000); |
| 375 | v.z.y = Float((y << 12) & 0xFFC00000); |
| 376 | v.z.z = Float((z << 2) & 0xFFC00000); |
| 377 | } |
| 378 | |
| 379 | { |
| 380 | Int x, y, z; |
| 381 | |
| 382 | x = y = z = *Pointer<Int>(source3); |
| 383 | |
| 384 | v.w.x = Float((x << 22) & 0xFFC00000); |
| 385 | v.w.y = Float((y << 12) & 0xFFC00000); |
| 386 | v.w.z = Float((z << 2) & 0xFFC00000); |
| 387 | } |
| 388 | |
| 389 | transpose4x3(v.x, v.y, v.z, v.w); |
| 390 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 391 | v.x *= Float4(1.0f / 0x00400000 / 511.0f); |
| 392 | v.y *= Float4(1.0f / 0x00400000 / 511.0f); |
| 393 | v.z *= Float4(1.0f / 0x00400000 / 511.0f); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 394 | } |
| 395 | break; |
| 396 | case STREAMTYPE_FIXED: |
| 397 | { |
| 398 | v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed)); |
| 399 | v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed)); |
| 400 | v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed)); |
| 401 | v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed)); |
| 402 | |
| 403 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
| 404 | } |
| 405 | break; |
| 406 | case STREAMTYPE_HALF: |
| 407 | { |
| 408 | if(stream.count >= 1) |
| 409 | { |
| 410 | UShort x0 = *Pointer<UShort>(source0 + 0); |
| 411 | UShort x1 = *Pointer<UShort>(source1 + 0); |
| 412 | UShort x2 = *Pointer<UShort>(source2 + 0); |
| 413 | UShort x3 = *Pointer<UShort>(source3 + 0); |
| 414 | |
| 415 | v.x.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x0) * 4); |
| 416 | v.x.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x1) * 4); |
| 417 | v.x.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x2) * 4); |
| 418 | v.x.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x3) * 4); |
| 419 | } |
| 420 | |
| 421 | if(stream.count >= 2) |
| 422 | { |
| 423 | UShort y0 = *Pointer<UShort>(source0 + 2); |
| 424 | UShort y1 = *Pointer<UShort>(source1 + 2); |
| 425 | UShort y2 = *Pointer<UShort>(source2 + 2); |
| 426 | UShort y3 = *Pointer<UShort>(source3 + 2); |
| 427 | |
| 428 | v.y.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y0) * 4); |
| 429 | v.y.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y1) * 4); |
| 430 | v.y.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y2) * 4); |
| 431 | v.y.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y3) * 4); |
| 432 | } |
| 433 | |
| 434 | if(stream.count >= 3) |
| 435 | { |
| 436 | UShort z0 = *Pointer<UShort>(source0 + 4); |
| 437 | UShort z1 = *Pointer<UShort>(source1 + 4); |
| 438 | UShort z2 = *Pointer<UShort>(source2 + 4); |
| 439 | UShort z3 = *Pointer<UShort>(source3 + 4); |
| 440 | |
| 441 | v.z.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z0) * 4); |
| 442 | v.z.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z1) * 4); |
| 443 | v.z.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z2) * 4); |
| 444 | v.z.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z3) * 4); |
| 445 | } |
| 446 | |
| 447 | if(stream.count >= 4) |
| 448 | { |
| 449 | UShort w0 = *Pointer<UShort>(source0 + 6); |
| 450 | UShort w1 = *Pointer<UShort>(source1 + 6); |
| 451 | UShort w2 = *Pointer<UShort>(source2 + 6); |
| 452 | UShort w3 = *Pointer<UShort>(source3 + 6); |
| 453 | |
| 454 | v.w.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w0) * 4); |
| 455 | v.w.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w1) * 4); |
| 456 | v.w.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w2) * 4); |
| 457 | v.w.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w3) * 4); |
| 458 | } |
| 459 | } |
| 460 | break; |
| 461 | case STREAMTYPE_INDICES: |
| 462 | { |
| 463 | v.x.x = *Pointer<Float>(source0); |
| 464 | v.x.y = *Pointer<Float>(source1); |
| 465 | v.x.z = *Pointer<Float>(source2); |
| 466 | v.x.w = *Pointer<Float>(source3); |
| 467 | } |
| 468 | break; |
| 469 | default: |
| 470 | ASSERT(false); |
| 471 | } |
| 472 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 473 | if(stream.count < 1) v.x = Float4(0.0f); |
| 474 | if(stream.count < 2) v.y = Float4(0.0f); |
| 475 | if(stream.count < 3) v.z = Float4(0.0f); |
| 476 | if(stream.count < 4) v.w = Float4(1.0f); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 477 | |
| 478 | return v; |
| 479 | } |
| 480 | |
| 481 | void VertexRoutine::postTransform(Registers &r) |
| 482 | { |
| 483 | int pos = state.positionRegister; |
| 484 | |
Nicolas Capens | 5491cb4 | 2015-07-02 15:33:29 -0400 | [diff] [blame] | 485 | if(!halfIntegerCoordinates) |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 486 | { |
Nicolas Capens | 5491cb4 | 2015-07-02 15:33:29 -0400 | [diff] [blame] | 487 | r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w; |
| 488 | r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w; |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 489 | } |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 490 | |
Nicolas Capens | 5ce0ea6 | 2015-07-02 16:55:29 -0400 | [diff] [blame^] | 491 | if(symmetricNormalizedDepth && !state.fixedFunction) |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 492 | { |
| 493 | r.o[pos].z = (r.o[pos].z + r.o[pos].w) * Float4(0.5f); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 494 | } |
| 495 | } |
| 496 | |
| 497 | void VertexRoutine::writeCache(Pointer<Byte> &cacheLine, Registers &r) |
| 498 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 499 | Vector4f v; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 500 | |
| 501 | for(int i = 0; i < 12; i++) |
| 502 | { |
| 503 | if(state.output[i].write) |
| 504 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 505 | v.x = r.o[i].x; |
| 506 | v.y = r.o[i].y; |
| 507 | v.z = r.o[i].z; |
| 508 | v.w = r.o[i].w; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 509 | |
| 510 | if(state.output[i].xClamp) |
| 511 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 512 | v.x = Max(v.x, Float4(0.0f)); |
| 513 | v.x = Min(v.x, Float4(1.0f)); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 514 | } |
| 515 | |
| 516 | if(state.output[i].yClamp) |
| 517 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 518 | v.y = Max(v.y, Float4(0.0f)); |
| 519 | v.y = Min(v.y, Float4(1.0f)); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 520 | } |
| 521 | |
| 522 | if(state.output[i].zClamp) |
| 523 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 524 | v.z = Max(v.z, Float4(0.0f)); |
| 525 | v.z = Min(v.z, Float4(1.0f)); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 526 | } |
| 527 | |
| 528 | if(state.output[i].wClamp) |
| 529 | { |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 530 | v.w = Max(v.w, Float4(0.0f)); |
| 531 | v.w = Min(v.w, Float4(1.0f)); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 532 | } |
| 533 | |
| 534 | if(state.output[i].write == 0x01) |
| 535 | { |
| 536 | *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0) = v.x.x; |
| 537 | *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1) = v.x.y; |
| 538 | *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2) = v.x.z; |
| 539 | *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3) = v.x.w; |
| 540 | } |
| 541 | else |
| 542 | { |
| 543 | if(state.output[i].write == 0x02) |
| 544 | { |
| 545 | transpose2x4(v.x, v.y, v.z, v.w); |
| 546 | } |
| 547 | else |
| 548 | { |
| 549 | transpose4x4(v.x, v.y, v.z, v.w); |
| 550 | } |
| 551 | |
| 552 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x; |
| 553 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y; |
| 554 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z; |
| 555 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w; |
| 556 | } |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (r.clipFlags >> 0) & 0x0000000FF; // FIXME: unsigned char Vertex::clipFlags |
| 561 | *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (r.clipFlags >> 8) & 0x0000000FF; |
| 562 | *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (r.clipFlags >> 16) & 0x0000000FF; |
| 563 | *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (r.clipFlags >> 24) & 0x0000000FF; |
| 564 | |
| 565 | int pos = state.positionRegister; |
| 566 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 567 | v.x = r.o[pos].x; |
| 568 | v.y = r.o[pos].y; |
| 569 | v.z = r.o[pos].z; |
| 570 | v.w = r.o[pos].w; |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 571 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 572 | Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 573 | Float4 rhw = Float4(1.0f) / w; |
| 574 | |
John Bauman | 19bac1e | 2014-05-06 15:23:49 -0400 | [diff] [blame] | 575 | v.x = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16)))); |
| 576 | v.y = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16)))); |
John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame] | 577 | v.z = v.z * rhw; |
| 578 | v.w = rhw; |
| 579 | |
| 580 | transpose4x4(v.x, v.y, v.z, v.w); |
| 581 | |
| 582 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x; |
| 583 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y; |
| 584 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z; |
| 585 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w; |
| 586 | } |
| 587 | |
| 588 | void VertexRoutine::writeVertex(Pointer<Byte> &vertex, Pointer<Byte> &cache) |
| 589 | { |
| 590 | for(int i = 0; i < 12; i++) |
| 591 | { |
| 592 | if(state.output[i].write) |
| 593 | { |
| 594 | *Pointer<Float4>(vertex + OFFSET(Vertex,v[i])) = *Pointer<Float4>(cache + OFFSET(Vertex,v[i])); |
| 595 | } |
| 596 | } |
| 597 | |
| 598 | *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags)); |
| 599 | *Pointer<Float4>(vertex + OFFSET(Vertex,X)) = *Pointer<Float4>(cache + OFFSET(Vertex,X)); |
| 600 | } |
| 601 | } |