blob: a11cf185197156653a4ca8db05bb4343684df620 [file] [log] [blame]
// SwiftShader Software Renderer
//
// Copyright(c) 2005-2012 TransGaming Inc.
//
// All rights reserved. No part of this software may be copied, distributed, transmitted,
// transcribed, stored in a retrieval system, translated into any human or computer
// language by any means, or disclosed to third parties without the explicit written
// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
// or implied, including but not limited to any patent rights, are granted to you.
//
#include "QuadRasterizer.hpp"
#include "Math.hpp"
#include "Primitive.hpp"
#include "Renderer.hpp"
#include "Constants.hpp"
#include "Debug.hpp"
namespace sw
{
extern bool veryEarlyDepthTest;
extern bool complementaryDepthBuffer;
extern int clusterCount;
QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : PixelRoutine(state, pixelShader)
{
}
QuadRasterizer::~QuadRasterizer()
{
}
void QuadRasterizer::generate()
{
Function<Void, Pointer<Byte>, Int, Int, Pointer<Byte> > function;
{
#if PERF_PROFILE
Long pixelTime = Ticks();
#endif
Pointer<Byte> primitive(function.arg(0));
Int count(function.arg(1));
Int cluster(function.arg(2));
Pointer<Byte> data(function.arg(3));
Registers r(shader);
r.constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
r.cluster = cluster;
r.data = data;
Do
{
r.primitive = primitive;
Int yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin));
Int yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax));
Int cluster2 = r.cluster + r.cluster;
yMin += clusterCount * 2 - 2 - cluster2;
yMin &= -clusterCount * 2;
yMin += cluster2;
If(yMin < yMax)
{
rasterize(r, yMin, yMax);
}
primitive += sizeof(Primitive) * state.multiSample;
count--;
}
Until(count == 0)
if(state.occlusionEnabled)
{
UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster);
clusterOcclusion += r.occlusion;
*Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion;
}
#if PERF_PROFILE
r.cycles[PERF_PIXEL] = Ticks() - pixelTime;
for(int i = 0; i < PERF_TIMERS; i++)
{
*Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += r.cycles[i];
}
#endif
Return();
}
routine = function(L"PixelRoutine_%0.8X", state.shaderID);
}
void QuadRasterizer::rasterize(Registers &r, Int &yMin, Int &yMax)
{
Pointer<Byte> cBuffer[4];
Pointer<Byte> zBuffer;
Pointer<Byte> sBuffer;
for(int index = 0; index < 4; index++)
{
if(state.colorWriteActive(index))
{
cBuffer[index] = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
}
}
if(state.depthTestActive)
{
zBuffer = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
}
if(state.stencilActive)
{
sBuffer = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,stencilPitchB));
}
Int y = yMin;
Do
{
Int x0;
Int x1;
Int x2;
x0 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span)));
x2 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span)));
x0 = Min(x0, x2);
for(unsigned int q = 1; q < state.multiSample; q++)
{
Int x0q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span)));
Int x2q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span)));
x0q = Min(x0q, x2q);
x0 = Min(x0q, x0);
}
x0 &= 0xFFFFFFFE;
x1 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span)));
x2 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span)));
x1 = Max(x1, x2);
for(unsigned int q = 1; q < state.multiSample; q++)
{
Int x1q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span)));
Int x2q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span)));
x1q = Max(x1q, x2q);
x1 = Max(x1q, x1);
}
Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,yQuad), 16);
if(interpolateZ())
{
for(unsigned int q = 0; q < state.multiSample; q++)
{
Float4 y = yyyy;
if(state.multiSample > 1)
{
y -= *Pointer<Float4>(r.constants + OFFSET(Constants,Y) + q * sizeof(float4));
}
r.Dz[q] = *Pointer<Float4>(r.primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(r.primitive + OFFSET(Primitive,z.B), 16);
}
}
if(veryEarlyDepthTest && state.multiSample == 1)
{
if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == Context::DEPTH_LESSEQUAL || state.depthCompareMode == Context::DEPTH_LESS)) // FIXME: Both modes ok?
{
Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,xQuad), 16);
Pointer<Byte> buffer;
Int pitch;
if(!state.quadLayoutDepthBuffer)
{
buffer = zBuffer + 4 * x0;
pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
}
else
{
buffer = zBuffer + 8 * x0;
}
For(Int x = x0, x < x1, x += 2)
{
Float4 z = interpolate(xxxx, r.Dz[0], z, r.primitive + OFFSET(Primitive,z), false, false);
Float4 zValue;
if(!state.quadLayoutDepthBuffer)
{
// FIXME: Properly optimizes?
zValue.xy = *Pointer<Float4>(buffer);
zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
}
else
{
zValue = *Pointer<Float4>(buffer, 16);
}
Int4 zTest;
if(complementaryDepthBuffer)
{
zTest = CmpLE(zValue, z);
}
else
{
zTest = CmpNLT(zValue, z);
}
Int zMask = SignMask(zTest);
If(zMask == 0)
{
x0 += 2;
}
Else
{
x = x1;
}
xxxx += Float4(2);
if(!state.quadLayoutDepthBuffer)
{
buffer += 8;
}
else
{
buffer += 16;
}
}
}
}
If(x0 < x1)
{
if(interpolateW())
{
r.Dw = *Pointer<Float4>(r.primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(r.primitive + OFFSET(Primitive,w.B), 16);
}
for(int interpolant = 0; interpolant < 11; interpolant++)
{
int componentCount = interpolant < 10 ? 4 : 1; // Fog only has one component
for(int component = 0; component < componentCount; component++)
{
if(state.interpolant[interpolant].component & (1 << component))
{
r.Dv[interpolant][component] = *Pointer<Float4>(r.primitive + OFFSET(Primitive,V[interpolant][component].C), 16);
if(!(state.interpolant[interpolant].flat & (1 << component)))
{
r.Dv[interpolant][component] += yyyy * *Pointer<Float4>(r.primitive + OFFSET(Primitive,V[interpolant][component].B), 16);
}
}
}
}
Short4 xLeft[4];
Short4 xRight[4];
for(unsigned int q = 0; q < state.multiSample; q++)
{
xLeft[q] = *Pointer<Short4>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span));
xRight[q] = xLeft[q];
xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2);
xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1);
}
For(Int x = x0, x < x1, x += 2)
{
Short4 xxxx = Short4(x);
Int cMask[4];
for(unsigned int q = 0; q < state.multiSample; q++)
{
Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx);
cMask[q] = SignMask(Pack(mask, mask)) & 0x0000000F;
}
quad(r, cBuffer, zBuffer, sBuffer, cMask, x, y);
}
}
for(int index = 0; index < 4; index++)
{
if(state.colorWriteActive(index))
{
cBuffer[index] += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])) << (1 + sw::log2(clusterCount)); // FIXME: Precompute
}
}
if(state.depthTestActive)
{
zBuffer += *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute
}
if(state.stencilActive)
{
sBuffer += *Pointer<Int>(r.data + OFFSET(DrawData,stencilPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute
}
y += 2 * clusterCount;
}
Until(y >= yMax)
}
}