Update SwiftShader to April code dump.
April code dump from Transgaming. Adds new shader compiler.
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index f28a741..1c2be07 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -1,6 +1,6 @@
// SwiftShader Software Renderer
//
-// Copyright(c) 2005-2011 TransGaming Inc.
+// Copyright(c) 2005-2012 TransGaming Inc.
//
// All rights reserved. No part of this software may be copied, distributed, transmitted,
// transcribed, stored in a retrieval system, translated into any human or computer
@@ -20,7 +20,10 @@
namespace sw
{
- VertexRoutine::VertexRoutine(const VertexProcessor::State &state) : state(state)
+ extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
+ extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
+
+ VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : state(state), shader(shader)
{
routine = 0;
}
@@ -46,7 +49,7 @@
UInt count = *Pointer<UInt>(task+ OFFSET(VertexTask,count));
- Registers r;
+ Registers r(shader);
r.data = data;
r.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
@@ -82,7 +85,7 @@
Return();
}
- routine = function(L"VertexRoutine_%0.16llX", state.shaderHash);
+ routine = function(L"VertexRoutine_%0.8X", state.shaderID);
}
Routine *VertexRoutine::getRoutine()
@@ -108,41 +111,41 @@
// Backtransform
if(state.preTransformed)
{
- Float4 rhw = Float4(1.0f, 1.0f, 1.0f, 1.0f) / r.ow[pos];
+ Float4 rhw = Float4(1.0f) / r.o[pos].w;
- Float4 W = *Pointer<Float4>(r.data + OFFSET(DrawData,WWWWx16)) * Float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f);
- Float4 H = *Pointer<Float4>(r.data + OFFSET(DrawData,HHHHx16)) * Float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f);
- Float4 L = *Pointer<Float4>(r.data + OFFSET(DrawData,LLLLx16)) * Float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f);
- Float4 T = *Pointer<Float4>(r.data + OFFSET(DrawData,TTTTx16)) * Float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f);
+ Float4 W = *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f);
+ Float4 H = *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f);
+ Float4 L = *Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f);
+ Float4 T = *Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f);
- r.ox[pos] = (r.ox[pos] - L) / W * rhw;
- r.oy[pos] = (r.oy[pos] - T) / H * rhw;
- r.oz[pos] = r.oz[pos] * rhw;
- r.ow[pos] = rhw;
+ r.o[pos].x = (r.o[pos].x - L) / W * rhw;
+ r.o[pos].y = (r.o[pos].y - T) / H * rhw;
+ r.o[pos].z = r.o[pos].z * rhw;
+ r.o[pos].w = rhw;
}
if(state.superSampling)
{
- r.ox[pos] = r.ox[pos] + *Pointer<Float4>(r.data + OFFSET(DrawData,XXXX)) * r.ow[pos];
- r.oy[pos] = r.oy[pos] + *Pointer<Float4>(r.data + OFFSET(DrawData,YYYY)) * r.ow[pos];
+ r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,XXXX)) * r.o[pos].w;
+ r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,YYYY)) * r.o[pos].w;
}
- Float4 clipX = r.ox[pos];
- Float4 clipY = r.oy[pos];
+ Float4 clipX = r.o[pos].x;
+ Float4 clipY = r.o[pos].y;
if(state.multiSampling) // Clip at pixel edges instead of pixel centers
{
- clipX += *Pointer<Float4>(r.data + OFFSET(DrawData,offX)) * r.ow[pos];
- clipY += *Pointer<Float4>(r.data + OFFSET(DrawData,offY)) * r.ow[pos];
+ clipX += *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w;
+ clipY += *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w;
}
- Int4 maxX = CmpLT(r.ow[pos], clipX);
- Int4 maxY = CmpLT(r.ow[pos], clipY);
- Int4 maxZ = CmpLT(r.ow[pos], r.oz[pos]);
+ Int4 maxX = CmpLT(r.o[pos].w, clipX);
+ Int4 maxY = CmpLT(r.o[pos].w, clipY);
+ Int4 maxZ = CmpLT(r.o[pos].w, r.o[pos].z);
- Int4 minX = CmpNLE(-r.ow[pos], clipX);
- Int4 minY = CmpNLE(-r.ow[pos], clipY);
- Int4 minZ = CmpNLE(Float4(0.0f, 0.0f, 0.0f, 0.0f), r.oz[pos]);
+ Int4 minX = CmpNLE(-r.o[pos].w, clipX);
+ Int4 minY = CmpNLE(-r.o[pos].w, clipY);
+ Int4 minZ = CmpNLE(Float4(0.0f), r.o[pos].z);
Int flags;
@@ -159,9 +162,9 @@
flags = SignMask(minZ);
r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minZ) + flags * 4);
- Int4 finiteX = CmpLE(Abs(r.ox[pos]), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
- Int4 finiteY = CmpLE(Abs(r.oy[pos]), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
- Int4 finiteZ = CmpLE(Abs(r.oz[pos]), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
+ Int4 finiteX = CmpLE(Abs(r.o[pos].x), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
+ Int4 finiteY = CmpLE(Abs(r.o[pos].y), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
+ Int4 finiteZ = CmpLE(Abs(r.o[pos].z), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
flags = SignMask(finiteX & finiteY & finiteZ);
r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,fini) + flags * 4);
@@ -172,11 +175,11 @@
}
}
- Color4f VertexRoutine::readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index)
+ Vector4f VertexRoutine::readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index)
{
const bool texldl = state.shaderContainsTexldl;
- Color4f v;
+ Vector4f v;
Pointer<Byte> source0 = buffer + index * stride;
Pointer<Byte> source1 = source0 + (!texldl ? stride : 0);
@@ -341,8 +344,8 @@
transpose4x3(v.x, v.y, v.z, v.w);
- v.y *= Float4(1.0f / 0x00000400, 1.0f / 0x00000400, 1.0f / 0x00000400, 1.0f / 0x00000400);
- v.z *= Float4(1.0f / 0x00100000, 1.0f / 0x00100000, 1.0f / 0x00100000, 1.0f / 0x00100000);
+ v.y *= Float4(1.0f / 0x00000400);
+ v.z *= Float4(1.0f / 0x00100000);
}
break;
case STREAMTYPE_DEC3N:
@@ -390,9 +393,9 @@
transpose4x3(v.x, v.y, v.z, v.w);
- v.x *= Float4(1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f);
- v.y *= Float4(1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f);
- v.z *= Float4(1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f);
+ v.x *= Float4(1.0f / 0x00400000 / 511.0f);
+ v.y *= Float4(1.0f / 0x00400000 / 511.0f);
+ v.z *= Float4(1.0f / 0x00400000 / 511.0f);
}
break;
case STREAMTYPE_FIXED:
@@ -472,10 +475,10 @@
ASSERT(false);
}
- if(stream.count < 1) v.x = Float4(0.0f, 0.0f, 0.0f, 0.0f);
- if(stream.count < 2) v.y = Float4(0.0f, 0.0f, 0.0f, 0.0f);
- if(stream.count < 3) v.z = Float4(0.0f, 0.0f, 0.0f, 0.0f);
- if(stream.count < 4) v.w = Float4(1.0f, 1.0f, 1.0f, 1.0f);
+ if(stream.count < 1) v.x = Float4(0.0f);
+ if(stream.count < 2) v.y = Float4(0.0f);
+ if(stream.count < 3) v.z = Float4(0.0f);
+ if(stream.count < 4) v.w = Float4(1.0f);
return v;
}
@@ -484,55 +487,53 @@
{
int pos = state.positionRegister;
- if(state.postTransform && !state.preTransformed)
+ if(halfIntegerCoordinates)
{
- Float4 posScale = *Pointer<Float4>(r.data + OFFSET(DrawData,posScale)); // FIXME: Unpack
+ r.o[pos].x = r.o[pos].x - *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w;
+ r.o[pos].y = r.o[pos].y - *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w;
+ }
- r.ox[pos] = r.ox[pos] * posScale.x;
- r.oy[pos] = r.oy[pos] * posScale.y;
-
- Float4 posOffset = *Pointer<Float4>(r.data + OFFSET(DrawData,posOffset)); // FIXME: Unpack
-
- r.ox[pos] = r.ox[pos] + r.ow[pos] * posOffset.x;
- r.oy[pos] = r.oy[pos] + r.ow[pos] * posOffset.y;
+ if(symmetricNormalizedDepth)
+ {
+ r.o[pos].z = (r.o[pos].z + r.o[pos].w) * Float4(0.5f);
}
}
void VertexRoutine::writeCache(Pointer<Byte> &cacheLine, Registers &r)
{
- Color4f v;
+ Vector4f v;
for(int i = 0; i < 12; i++)
{
if(state.output[i].write)
{
- v.x = r.ox[i];
- v.y = r.oy[i];
- v.z = r.oz[i];
- v.w = r.ow[i];
+ v.x = r.o[i].x;
+ v.y = r.o[i].y;
+ v.z = r.o[i].z;
+ v.w = r.o[i].w;
if(state.output[i].xClamp)
{
- v.x = Max(v.x, Float4(0.0f, 0.0f, 0.0f, 0.0f));
- v.x = Min(v.x, Float4(1.0f, 1.0f, 1.0f, 1.0f));
+ v.x = Max(v.x, Float4(0.0f));
+ v.x = Min(v.x, Float4(1.0f));
}
if(state.output[i].yClamp)
{
- v.y = Max(v.y, Float4(0.0f, 0.0f, 0.0f, 0.0f));
- v.y = Min(v.y, Float4(1.0f, 1.0f, 1.0f, 1.0f));
+ v.y = Max(v.y, Float4(0.0f));
+ v.y = Min(v.y, Float4(1.0f));
}
if(state.output[i].zClamp)
{
- v.z = Max(v.z, Float4(0.0f, 0.0f, 0.0f, 0.0f));
- v.z = Min(v.z, Float4(1.0f, 1.0f, 1.0f, 1.0f));
+ v.z = Max(v.z, Float4(0.0f));
+ v.z = Min(v.z, Float4(1.0f));
}
if(state.output[i].wClamp)
{
- v.w = Max(v.w, Float4(0.0f, 0.0f, 0.0f, 0.0f));
- v.w = Min(v.w, Float4(1.0f, 1.0f, 1.0f, 1.0f));
+ v.w = Max(v.w, Float4(0.0f));
+ v.w = Min(v.w, Float4(1.0f));
}
if(state.output[i].write == 0x01)
@@ -568,16 +569,16 @@
int pos = state.positionRegister;
- v.x = r.ox[pos];
- v.y = r.oy[pos];
- v.z = r.oz[pos];
- v.w = r.ow[pos];
+ v.x = r.o[pos].x;
+ v.y = r.o[pos].y;
+ v.z = r.o[pos].z;
+ v.w = r.o[pos].w;
- Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0, 0, 0, 0))) & As<Int4>(Float4(1, 1, 1, 1))));
+ Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
Float4 rhw = Float4(1.0f) / w;
- v.x = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,LLLLx16)) + v.x * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,WWWWx16))));
- v.y = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,TTTTx16)) + v.y * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,HHHHx16))));
+ v.x = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16))));
+ v.y = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16))));
v.z = v.z * rhw;
v.w = rhw;