| // SwiftShader Software Renderer |
| // |
| // Copyright(c) 2005-2012 TransGaming Inc. |
| // |
| // All rights reserved. No part of this software may be copied, distributed, transmitted, |
| // transcribed, stored in a retrieval system, translated into any human or computer |
| // language by any means, or disclosed to third parties without the explicit written |
| // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express |
| // or implied, including but not limited to any patent rights, are granted to you. |
| // |
| |
| #include "QuadRasterizer.hpp" |
| |
| #include "Math.hpp" |
| #include "Primitive.hpp" |
| #include "Renderer.hpp" |
| #include "Constants.hpp" |
| #include "Debug.hpp" |
| |
| namespace sw |
| { |
| extern bool veryEarlyDepthTest; |
| extern bool complementaryDepthBuffer; |
| |
| extern int clusterCount; |
| |
| QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : PixelRoutine(state, pixelShader) |
| { |
| } |
| |
| QuadRasterizer::~QuadRasterizer() |
| { |
| } |
| |
| void QuadRasterizer::generate() |
| { |
| Function<Void, Pointer<Byte>, Int, Int, Pointer<Byte>> function; |
| { |
| #if PERF_PROFILE |
| Long pixelTime = Ticks(); |
| #endif |
| |
| Pointer<Byte> primitive(function.arg(0)); |
| Int count(function.arg(1)); |
| Int cluster(function.arg(2)); |
| Pointer<Byte> data(function.arg(3)); |
| |
| Registers r(shader); |
| r.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); |
| r.cluster = cluster; |
| r.data = data; |
| |
| Do |
| { |
| r.primitive = primitive; |
| |
| Int yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin)); |
| Int yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax)); |
| |
| Int cluster2 = r.cluster + r.cluster; |
| yMin += clusterCount * 2 - 2 - cluster2; |
| yMin &= -clusterCount * 2; |
| yMin += cluster2; |
| |
| If(yMin < yMax) |
| { |
| rasterize(r, yMin, yMax); |
| } |
| |
| primitive += sizeof(Primitive) * state.multiSample; |
| count--; |
| } |
| Until(count == 0) |
| |
| if(state.occlusionEnabled) |
| { |
| UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster); |
| clusterOcclusion += r.occlusion; |
| *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion; |
| } |
| |
| #if PERF_PROFILE |
| r.cycles[PERF_PIXEL] = Ticks() - pixelTime; |
| |
| for(int i = 0; i < PERF_TIMERS; i++) |
| { |
| *Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += r.cycles[i]; |
| } |
| #endif |
| |
| Return(); |
| } |
| |
| routine = function(L"PixelRoutine_%0.8X", state.shaderID); |
| } |
| |
| void QuadRasterizer::rasterize(Registers &r, Int &yMin, Int &yMax) |
| { |
| Pointer<Byte> cBuffer[4]; |
| Pointer<Byte> zBuffer; |
| Pointer<Byte> sBuffer; |
| |
| for(int index = 0; index < 4; index++) |
| { |
| if(state.colorWriteActive(index)) |
| { |
| cBuffer[index] = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); |
| } |
| } |
| |
| if(state.depthTestActive) |
| { |
| zBuffer = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB)); |
| } |
| |
| if(state.stencilActive) |
| { |
| sBuffer = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,stencilPitchB)); |
| } |
| |
| Int y = yMin; |
| |
| Do |
| { |
| Int x0; |
| Int x1; |
| Int x2; |
| |
| x0 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); |
| x2 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); |
| x0 = Min(x0, x2); |
| |
| for(unsigned int q = 1; q < state.multiSample; q++) |
| { |
| Int x0q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); |
| Int x2q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); |
| x0q = Min(x0q, x2q); |
| |
| x0 = Min(x0q, x0); |
| } |
| |
| x0 &= 0xFFFFFFFE; |
| |
| x1 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); |
| x2 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); |
| x1 = Max(x1, x2); |
| |
| for(unsigned int q = 1; q < state.multiSample; q++) |
| { |
| Int x1q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); |
| Int x2q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); |
| x1q = Max(x1q, x2q); |
| |
| x1 = Max(x1q, x1); |
| } |
| |
| Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,yQuad), 16); |
| |
| if(interpolateZ()) |
| { |
| for(unsigned int q = 0; q < state.multiSample; q++) |
| { |
| Float4 y = yyyy; |
| |
| if(state.multiSample > 1) |
| { |
| y -= *Pointer<Float4>(r.constants + OFFSET(Constants,Y) + q * sizeof(float4)); |
| } |
| |
| r.Dz[q] = *Pointer<Float4>(r.primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(r.primitive + OFFSET(Primitive,z.B), 16); |
| } |
| } |
| |
| if(veryEarlyDepthTest && state.multiSample == 1) |
| { |
| if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == Context::DEPTH_LESSEQUAL || state.depthCompareMode == Context::DEPTH_LESS)) // FIXME: Both modes ok? |
| { |
| Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,xQuad), 16); |
| |
| Pointer<Byte> buffer; |
| Int pitch; |
| |
| if(!state.quadLayoutDepthBuffer) |
| { |
| buffer = zBuffer + 4 * x0; |
| pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB)); |
| } |
| else |
| { |
| buffer = zBuffer + 8 * x0; |
| } |
| |
| For(Int x = x0, x < x1, x += 2) |
| { |
| Float4 z = interpolate(xxxx, r.Dz[0], z, r.primitive + OFFSET(Primitive,z), false, false); |
| |
| Float4 zValue; |
| |
| if(!state.quadLayoutDepthBuffer) |
| { |
| // FIXME: Properly optimizes? |
| zValue.xy = *Pointer<Float4>(buffer); |
| zValue.zw = *Pointer<Float4>(buffer + pitch - 8); |
| } |
| else |
| { |
| zValue = *Pointer<Float4>(buffer, 16); |
| } |
| |
| Int4 zTest; |
| |
| if(complementaryDepthBuffer) |
| { |
| zTest = CmpLE(zValue, z); |
| } |
| else |
| { |
| zTest = CmpNLT(zValue, z); |
| } |
| |
| Int zMask = SignMask(zTest); |
| |
| If(zMask == 0) |
| { |
| x0 += 2; |
| } |
| Else |
| { |
| x = x1; |
| } |
| |
| xxxx += Float4(2); |
| |
| if(!state.quadLayoutDepthBuffer) |
| { |
| buffer += 8; |
| } |
| else |
| { |
| buffer += 16; |
| } |
| } |
| } |
| } |
| |
| If(x0 < x1) |
| { |
| if(interpolateW()) |
| { |
| r.Dw = *Pointer<Float4>(r.primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(r.primitive + OFFSET(Primitive,w.B), 16); |
| } |
| |
| for(int interpolant = 0; interpolant < 11; interpolant++) |
| { |
| int componentCount = interpolant < 10 ? 4 : 1; // Fog only has one component |
| |
| for(int component = 0; component < componentCount; component++) |
| { |
| if(state.interpolant[interpolant].component & (1 << component)) |
| { |
| r.Dv[interpolant][component] = *Pointer<Float4>(r.primitive + OFFSET(Primitive,V[interpolant][component].C), 16); |
| |
| if(!(state.interpolant[interpolant].flat & (1 << component))) |
| { |
| r.Dv[interpolant][component] += yyyy * *Pointer<Float4>(r.primitive + OFFSET(Primitive,V[interpolant][component].B), 16); |
| } |
| } |
| } |
| } |
| |
| Short4 xLeft[4]; |
| Short4 xRight[4]; |
| |
| for(unsigned int q = 0; q < state.multiSample; q++) |
| { |
| xLeft[q] = *Pointer<Short4>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span)); |
| xRight[q] = xLeft[q]; |
| |
| xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2); |
| xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1); |
| } |
| |
| For(Int x = x0, x < x1, x += 2) |
| { |
| Short4 xxxx = Short4(x); |
| Int cMask[4]; |
| |
| for(unsigned int q = 0; q < state.multiSample; q++) |
| { |
| Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx); |
| cMask[q] = SignMask(Pack(mask, mask)) & 0x0000000F; |
| } |
| |
| quad(r, cBuffer, zBuffer, sBuffer, cMask, x, y); |
| } |
| } |
| |
| for(int index = 0; index < 4; index++) |
| { |
| if(state.colorWriteActive(index)) |
| { |
| cBuffer[index] += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])) << (1 + sw::log2(clusterCount)); // FIXME: Precompute |
| } |
| } |
| |
| if(state.depthTestActive) |
| { |
| zBuffer += *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute |
| } |
| |
| if(state.stencilActive) |
| { |
| sBuffer += *Pointer<Int>(r.data + OFFSET(DrawData,stencilPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute |
| } |
| |
| y += 2 * clusterCount; |
| } |
| Until(y >= yMax) |
| } |
| } |