Update SwiftShader to April code dump. April code dump from Transgaming. Adds new shader compiler.
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp index f1ac5be..e1b735d 100644 --- a/src/Shader/PixelRoutine.cpp +++ b/src/Shader/PixelRoutine.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -12,7 +12,6 @@ #include "PixelRoutine.hpp" #include "Renderer.hpp" -#include "PixelShader.hpp" #include "QuadRasterizer.hpp" #include "Surface.hpp" #include "Primitive.hpp" @@ -30,17 +29,21 @@ extern bool complementaryDepthBuffer; extern bool postBlendSRGB; extern bool exactColorRounding; + extern bool booleanFaceRegister; + extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates + extern bool fullPixelPositionRegister; - PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *pixelShader) : Rasterizer(state), pixelShader(pixelShader) + PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : Rasterizer(state), shader(shader) { perturbate = false; luminance = false; previousScaling = false; - returns = false; ifDepth = 0; loopRepDepth = 0; breakDepth = 0; + currentLabel = -1; + whileTest = false; for(int i = 0; i < 2048; i++) { @@ -68,7 +71,7 @@ } const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive(); - const bool integerPipeline = pixelShaderVersion() <= 0x0104; + const bool integerPipeline = shaderVersion() <= 0x0104; Int zMask[4]; // Depth mask Int sMask[4]; // Stencil mask @@ -86,18 +89,19 @@ Float4 f; - Color4i ¤t = r.ri[0]; - Color4i &diffuse = r.vi[0]; - Color4i &specular = r.vi[1]; + Vector4i ¤t = r.ri[0]; + Vector4i &diffuse = r.vi[0]; + Vector4i &specular = r.vi[1]; Float4 (&z)[4] = r.z; + Float4 &w = r.w; Float4 &rhw = r.rhw; Float4 rhwCentroid; Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,xQuad), 16); Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,yQuad), 16); - if(state.depthTestActive || state.pixelFogActive()) + if(interpolateZ()) { for(unsigned int q = 0; q < state.multiSample; q++) { @@ -151,9 +155,10 @@ YYYY += yyyy; } - if(state.perspective) + if(interpolateW()) { - rhw = reciprocal(interpolate(xxxx, r.Dw, rhw, r.primitive + OFFSET(Primitive,w), false, false)); + w = interpolate(xxxx, r.Dw, rhw, r.primitive + OFFSET(Primitive,w), false, false); + rhw = reciprocal(w); if(state.centroid) { @@ -165,27 +170,15 @@ { for(int component = 0; component < 4; component++) { - Array<Float4> *pv; - - switch(component) - { - case 0: pv = &r.vx; break; - case 1: pv = &r.vy; break; - case 2: pv = &r.vz; break; - case 3: pv = &r.vw; break; - } - - Array<Float4> &v = *pv; - if(state.interpolant[interpolant].component & (1 << component)) { if(!state.interpolant[interpolant].centroid) { - v[interpolant] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive,V[interpolant][component]), state.interpolant[interpolant].flat & (1 << component), state.perspective); + r.vf[interpolant][component] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive,V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); } else { - v[interpolant] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,V[interpolant][component]), state.interpolant[interpolant].flat & (1 << component), state.perspective); + r.vf[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); } } } @@ -197,19 +190,19 @@ case 0: break; case 1: - rcp = reciprocal(Float4(r.vy[interpolant])); - r.vx[interpolant] = r.vx[interpolant] * rcp; + rcp = reciprocal(r.vf[interpolant].y); + r.vf[interpolant].x = r.vf[interpolant].x * rcp; break; case 2: - rcp = reciprocal(Float4(r.vz[interpolant])); - r.vx[interpolant] = r.vx[interpolant] * rcp; - r.vy[interpolant] = r.vy[interpolant] * rcp; + rcp = reciprocal(r.vf[interpolant].z); + r.vf[interpolant].x = r.vf[interpolant].x * rcp; + r.vf[interpolant].y = r.vf[interpolant].y * rcp; break; case 3: - rcp = reciprocal(Float4(r.vw[interpolant])); - r.vx[interpolant] = r.vx[interpolant] * rcp; - r.vy[interpolant] = r.vy[interpolant] * rcp; - r.vz[interpolant] = r.vz[interpolant] * rcp; + rcp = reciprocal(r.vf[interpolant].w); + r.vf[interpolant].x = r.vf[interpolant].x * rcp; + r.vf[interpolant].y = r.vf[interpolant].y * rcp; + r.vf[interpolant].z = r.vf[interpolant].z * rcp; break; } } @@ -221,32 +214,47 @@ if(integerPipeline) { - if(state.color[0].component & 0x1) diffuse.x = convertFixed12(Float4(r.vx[0])); else diffuse.x = Short4(0x1000, 0x1000, 0x1000, 0x1000); - if(state.color[0].component & 0x2) diffuse.y = convertFixed12(Float4(r.vy[0])); else diffuse.y = Short4(0x1000, 0x1000, 0x1000, 0x1000); - if(state.color[0].component & 0x4) diffuse.z = convertFixed12(Float4(r.vz[0])); else diffuse.z = Short4(0x1000, 0x1000, 0x1000, 0x1000); - if(state.color[0].component & 0x8) diffuse.w = convertFixed12(Float4(r.vw[0])); else diffuse.w = Short4(0x1000, 0x1000, 0x1000, 0x1000); + if(state.color[0].component & 0x1) diffuse.x = convertFixed12(r.vf[0].x); else diffuse.x = Short4(0x1000); + if(state.color[0].component & 0x2) diffuse.y = convertFixed12(r.vf[0].y); else diffuse.y = Short4(0x1000); + if(state.color[0].component & 0x4) diffuse.z = convertFixed12(r.vf[0].z); else diffuse.z = Short4(0x1000); + if(state.color[0].component & 0x8) diffuse.w = convertFixed12(r.vf[0].w); else diffuse.w = Short4(0x1000); - if(state.color[1].component & 0x1) specular.x = convertFixed12(Float4(r.vx[1])); else specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); - if(state.color[1].component & 0x2) specular.y = convertFixed12(Float4(r.vy[1])); else specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); - if(state.color[1].component & 0x4) specular.z = convertFixed12(Float4(r.vz[1])); else specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); - if(state.color[1].component & 0x8) specular.w = convertFixed12(Float4(r.vw[1])); else specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000); + if(state.color[1].component & 0x1) specular.x = convertFixed12(r.vf[1].x); else specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); + if(state.color[1].component & 0x2) specular.y = convertFixed12(r.vf[1].y); else specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); + if(state.color[1].component & 0x4) specular.z = convertFixed12(r.vf[1].z); else specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); + if(state.color[1].component & 0x8) specular.w = convertFixed12(r.vf[1].w); else specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000); } - else if(pixelShaderVersion() >= 0x0300) + else if(shaderVersion() >= 0x0300) { - if(pixelShader->vPosDeclared) + if(shader->vPosDeclared) { - r.vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1); - r.vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1); + if(!halfIntegerCoordinates) + { + r.vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1); + r.vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1); + } + else + { + r.vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f); + r.vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f); + } + + if(fullPixelPositionRegister) + { + r.vPos.z = z[0]; // FIXME: Centroid? + r.vPos.w = w; // FIXME: Centroid? + } } - if(pixelShader->vFaceDeclared) + if(shader->vFaceDeclared) { Float4 area = *Pointer<Float>(r.primitive + OFFSET(Primitive,area)); - - r.vFace.x = area; - r.vFace.y = area; - r.vFace.z = area; - r.vFace.w = area; + Float4 face = booleanFaceRegister ? As<Float4>(CmpNLT(area, Float4(0.0f))) : area; + + r.vFace.x = face; + r.vFace.y = face; + r.vFace.z = face; + r.vFace.w = face; } } @@ -262,11 +270,11 @@ Long shaderTime = Ticks(); #endif - if(pixelShader) + if(shader) { - // pixelShader->print("PixelShader-%0.16llX.txt", state.shaderHash); + // shader->print("PixelShader-%0.8X.txt", state.shaderID); - if(pixelShader->getVersion() <= 0x0104) + if(shader->getVersion() <= 0x0104) { ps_1_x(r, cMask); } @@ -278,7 +286,7 @@ else { current = diffuse; - Color4i temp(0x0000, 0x0000, 0x0000, 0x0000); + Vector4i temp(0x0000, 0x0000, 0x0000, 0x0000); for(int stage = 0; stage < 8; stage++) { @@ -287,7 +295,7 @@ break; } - Color4i texture; + Vector4i texture; if(state.textureStage[stage].usesTexture) { @@ -306,10 +314,10 @@ if(integerPipeline) { - current.r = Min(current.r, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.r = Max(current.r, Short4(0x0000, 0x0000, 0x0000, 0x0000)); - current.g = Min(current.g, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.g = Max(current.g, Short4(0x0000, 0x0000, 0x0000, 0x0000)); - current.b = Min(current.b, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.b = Max(current.b, Short4(0x0000, 0x0000, 0x0000, 0x0000)); - current.a = Min(current.a, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.a = Max(current.a, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + current.x = Min(current.x, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.x = Max(current.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + current.y = Min(current.y, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.y = Max(current.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + current.z = Min(current.z, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.z = Max(current.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + current.w = Min(current.w, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.w = Max(current.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); alphaPass = alphaTest(r, cMask, current); } @@ -320,7 +328,7 @@ alphaPass = alphaTest(r, cMask, r.oC[0]); } - if((pixelShader && pixelShader->containsTexkill()) || state.alphaTestActive()) + if((shader && shader->containsKill()) || state.alphaTestActive()) { for(unsigned int q = 0; q < state.multiSample; q++) { @@ -528,11 +536,11 @@ Float4 Z = z; - if(pixelShader && pixelShader->depthOverride()) + if(shader && shader->depthOverride()) { if(complementaryDepthBuffer) { - Z = Float4(1, 1, 1, 1) - r.oDepth; + Z = Float4(1.0f) - r.oDepth; } else { @@ -655,15 +663,15 @@ return zMask != 0; } - void PixelRoutine::blendTexture(Registers &r, Color4i ¤t, Color4i &temp, Color4i &texture, int stage) + void PixelRoutine::blendTexture(Registers &r, Vector4i ¤t, Vector4i &temp, Vector4i &texture, int stage) { - Color4i *arg1; - Color4i *arg2; - Color4i *arg3; - Color4i res; + Vector4i *arg1; + Vector4i *arg2; + Vector4i *arg3; + Vector4i res; - Color4i constant; - Color4i tfactor; + Vector4i constant; + Vector4i tfactor; const TextureStage::State &textureStage = state.textureStage[stage]; @@ -674,10 +682,10 @@ textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT || textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT) { - constant.r = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[0])); - constant.g = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[1])); - constant.b = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[2])); - constant.a = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[3])); + constant.x = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[0])); + constant.y = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[1])); + constant.z = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[2])); + constant.w = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[3])); } if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR || @@ -687,10 +695,10 @@ textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR || textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR) { - tfactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[0])); - tfactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[1])); - tfactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[2])); - tfactor.a = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3])); + tfactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[0])); + tfactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[1])); + tfactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[2])); + tfactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3])); } // Premodulate @@ -698,22 +706,22 @@ { if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE) { - current.r = MulHigh(current.r, texture.r) << 4; - current.g = MulHigh(current.g, texture.g) << 4; - current.b = MulHigh(current.b, texture.b) << 4; + current.x = MulHigh(current.x, texture.x) << 4; + current.y = MulHigh(current.y, texture.y) << 4; + current.z = MulHigh(current.z, texture.z) << 4; } if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE) { - current.a = MulHigh(current.a, texture.a) << 4; + current.w = MulHigh(current.w, texture.w) << 4; } } if(luminance) { - texture.r = MulHigh(texture.r, r.L) << 4; - texture.g = MulHigh(texture.g, r.L) << 4; - texture.b = MulHigh(texture.b, r.L) << 4; + texture.x = MulHigh(texture.x, r.L) << 4; + texture.y = MulHigh(texture.y, r.L) << 4; + texture.z = MulHigh(texture.z, r.L) << 4; luminance = false; } @@ -757,9 +765,9 @@ ASSERT(false); } - Color4i mod1; - Color4i mod2; - Color4i mod3; + Vector4i mod1; + Vector4i mod2; + Vector4i mod3; switch(textureStage.firstModifier) { @@ -767,30 +775,30 @@ break; case TextureStage::MODIFIER_INVCOLOR: { - mod1.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->r); - mod1.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->g); - mod1.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->b); - mod1.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a); + mod1.x = SubSat(Short4(0x1000), arg1->x); + mod1.y = SubSat(Short4(0x1000), arg1->y); + mod1.z = SubSat(Short4(0x1000), arg1->z); + mod1.w = SubSat(Short4(0x1000), arg1->w); arg1 = &mod1; } break; case TextureStage::MODIFIER_ALPHA: { - mod1.r = arg1->a; - mod1.g = arg1->a; - mod1.b = arg1->a; - mod1.a = arg1->a; + mod1.x = arg1->w; + mod1.y = arg1->w; + mod1.z = arg1->w; + mod1.w = arg1->w; arg1 = &mod1; } break; case TextureStage::MODIFIER_INVALPHA: { - mod1.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a); - mod1.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a); - mod1.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a); - mod1.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a); + mod1.x = SubSat(Short4(0x1000), arg1->w); + mod1.y = SubSat(Short4(0x1000), arg1->w); + mod1.z = SubSat(Short4(0x1000), arg1->w); + mod1.w = SubSat(Short4(0x1000), arg1->w); arg1 = &mod1; } @@ -805,30 +813,30 @@ break; case TextureStage::MODIFIER_INVCOLOR: { - mod2.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->r); - mod2.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->g); - mod2.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->b); - mod2.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a); + mod2.x = SubSat(Short4(0x1000), arg2->x); + mod2.y = SubSat(Short4(0x1000), arg2->y); + mod2.z = SubSat(Short4(0x1000), arg2->z); + mod2.w = SubSat(Short4(0x1000), arg2->w); arg2 = &mod2; } break; case TextureStage::MODIFIER_ALPHA: { - mod2.r = arg2->a; - mod2.g = arg2->a; - mod2.b = arg2->a; - mod2.a = arg2->a; + mod2.x = arg2->w; + mod2.y = arg2->w; + mod2.z = arg2->w; + mod2.w = arg2->w; arg2 = &mod2; } break; case TextureStage::MODIFIER_INVALPHA: { - mod2.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a); - mod2.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a); - mod2.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a); - mod2.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a); + mod2.x = SubSat(Short4(0x1000), arg2->w); + mod2.y = SubSat(Short4(0x1000), arg2->w); + mod2.z = SubSat(Short4(0x1000), arg2->w); + mod2.w = SubSat(Short4(0x1000), arg2->w); arg2 = &mod2; } @@ -843,30 +851,30 @@ break; case TextureStage::MODIFIER_INVCOLOR: { - mod3.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->r); - mod3.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->g); - mod3.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->b); - mod3.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a); + mod3.x = SubSat(Short4(0x1000), arg3->x); + mod3.y = SubSat(Short4(0x1000), arg3->y); + mod3.z = SubSat(Short4(0x1000), arg3->z); + mod3.w = SubSat(Short4(0x1000), arg3->w); arg3 = &mod3; } break; case TextureStage::MODIFIER_ALPHA: { - mod3.r = arg3->a; - mod3.g = arg3->a; - mod3.b = arg3->a; - mod3.a = arg3->a; + mod3.x = arg3->w; + mod3.y = arg3->w; + mod3.z = arg3->w; + mod3.w = arg3->w; arg3 = &mod3; } break; case TextureStage::MODIFIER_INVALPHA: { - mod3.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a); - mod3.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a); - mod3.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a); - mod3.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a); + mod3.x = SubSat(Short4(0x1000), arg3->w); + mod3.y = SubSat(Short4(0x1000), arg3->w); + mod3.z = SubSat(Short4(0x1000), arg3->w); + mod3.w = SubSat(Short4(0x1000), arg3->w); arg3 = &mod3; } @@ -881,211 +889,211 @@ break; case TextureStage::STAGE_SELECTARG1: // Arg1 { - res.r = arg1->r; - res.g = arg1->g; - res.b = arg1->b; + res.x = arg1->x; + res.y = arg1->y; + res.z = arg1->z; } break; case TextureStage::STAGE_SELECTARG2: // Arg2 { - res.r = arg2->r; - res.g = arg2->g; - res.b = arg2->b; + res.x = arg2->x; + res.y = arg2->y; + res.z = arg2->z; } break; case TextureStage::STAGE_SELECTARG3: // Arg3 { - res.r = arg3->r; - res.g = arg3->g; - res.b = arg3->b; + res.x = arg3->x; + res.y = arg3->y; + res.z = arg3->z; } break; case TextureStage::STAGE_MODULATE: // Arg1 * Arg2 { - res.r = MulHigh(arg1->r, arg2->r) << 4; - res.g = MulHigh(arg1->g, arg2->g) << 4; - res.b = MulHigh(arg1->b, arg2->b) << 4; + res.x = MulHigh(arg1->x, arg2->x) << 4; + res.y = MulHigh(arg1->y, arg2->y) << 4; + res.z = MulHigh(arg1->z, arg2->z) << 4; } break; case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2 { - res.r = MulHigh(arg1->r, arg2->r) << 5; - res.g = MulHigh(arg1->g, arg2->g) << 5; - res.b = MulHigh(arg1->b, arg2->b) << 5; + res.x = MulHigh(arg1->x, arg2->x) << 5; + res.y = MulHigh(arg1->y, arg2->y) << 5; + res.z = MulHigh(arg1->z, arg2->z) << 5; } break; case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4 { - res.r = MulHigh(arg1->r, arg2->r) << 6; - res.g = MulHigh(arg1->g, arg2->g) << 6; - res.b = MulHigh(arg1->b, arg2->b) << 6; + res.x = MulHigh(arg1->x, arg2->x) << 6; + res.y = MulHigh(arg1->y, arg2->y) << 6; + res.z = MulHigh(arg1->z, arg2->z) << 6; } break; case TextureStage::STAGE_ADD: // Arg1 + Arg2 { - res.r = AddSat(arg1->r, arg2->r); - res.g = AddSat(arg1->g, arg2->g); - res.b = AddSat(arg1->b, arg2->b); + res.x = AddSat(arg1->x, arg2->x); + res.y = AddSat(arg1->y, arg2->y); + res.z = AddSat(arg1->z, arg2->z); } break; case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5 { - res.r = AddSat(arg1->r, arg2->r); - res.g = AddSat(arg1->g, arg2->g); - res.b = AddSat(arg1->b, arg2->b); + res.x = AddSat(arg1->x, arg2->x); + res.y = AddSat(arg1->y, arg2->y); + res.z = AddSat(arg1->z, arg2->z); - res.r = SubSat(res.r, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - res.g = SubSat(res.g, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - res.b = SubSat(res.b, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); } break; case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1 { - res.r = AddSat(arg1->r, arg2->r); - res.g = AddSat(arg1->g, arg2->g); - res.b = AddSat(arg1->b, arg2->b); + res.x = AddSat(arg1->x, arg2->x); + res.y = AddSat(arg1->y, arg2->y); + res.z = AddSat(arg1->z, arg2->z); - res.r = SubSat(res.r, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - res.g = SubSat(res.g, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - res.b = SubSat(res.b, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - res.r = AddSat(res.r, res.r); - res.g = AddSat(res.g, res.g); - res.b = AddSat(res.b, res.b); + res.x = AddSat(res.x, res.x); + res.y = AddSat(res.y, res.y); + res.z = AddSat(res.z, res.z); } break; case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2 { - res.r = SubSat(arg1->r, arg2->r); - res.g = SubSat(arg1->g, arg2->g); - res.b = SubSat(arg1->b, arg2->b); + res.x = SubSat(arg1->x, arg2->x); + res.y = SubSat(arg1->y, arg2->y); + res.z = SubSat(arg1->z, arg2->z); } break; case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2 { Short4 tmp; - tmp = MulHigh(arg1->r, arg2->r) << 4; res.r = AddSat(arg1->r, arg2->r); res.r = SubSat(res.r, tmp); - tmp = MulHigh(arg1->g, arg2->g) << 4; res.g = AddSat(arg1->g, arg2->g); res.g = SubSat(res.g, tmp); - tmp = MulHigh(arg1->b, arg2->b) << 4; res.b = AddSat(arg1->b, arg2->b); res.b = SubSat(res.b, tmp); + tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp); + tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp); + tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp); } break; case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2 { - res.r = MulHigh(arg1->r, arg2->r) << 4; res.r = AddSat(res.r, arg3->r); - res.g = MulHigh(arg1->g, arg2->g) << 4; res.g = AddSat(res.g, arg3->g); - res.b = MulHigh(arg1->b, arg2->b) << 4; res.b = AddSat(res.b, arg3->b); + res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x); + res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y); + res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z); } break; case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2 { - res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, arg3->r) << 4; res.r = AddSat(res.r, arg2->r); - res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, arg3->g) << 4; res.g = AddSat(res.g, arg2->g); - res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, arg3->b) << 4; res.b = AddSat(res.b, arg2->b); + res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x); + res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y); + res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z); } break; - case TextureStage::STAGE_DOT3: // 2 * (Arg1.r - 0.5) * 2 * (Arg2.r - 0.5) + 2 * (Arg1.g - 0.5) * 2 * (Arg2.g - 0.5) + 2 * (Arg1.b - 0.5) * 2 * (Arg2.b - 0.5) + case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5) { Short4 tmp; - res.r = SubSat(arg1->r, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->r, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.r = MulHigh(res.r, tmp); - res.g = SubSat(arg1->g, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->g, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.g = MulHigh(res.g, tmp); - res.b = SubSat(arg1->b, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->b, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.b = MulHigh(res.b, tmp); + res.x = SubSat(arg1->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.x = MulHigh(res.x, tmp); + res.y = SubSat(arg1->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.y = MulHigh(res.y, tmp); + res.z = SubSat(arg1->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.z = MulHigh(res.z, tmp); - res.r = res.r << 6; - res.g = res.g << 6; - res.b = res.b << 6; + res.x = res.x << 6; + res.y = res.y << 6; + res.z = res.z << 6; - res.r = AddSat(res.r, res.g); - res.r = AddSat(res.r, res.b); + res.x = AddSat(res.x, res.y); + res.x = AddSat(res.x, res.z); // Clamp to [0, 1] - res.r = Max(res.r, Short4(0x0000, 0x0000, 0x0000, 0x0000)); - res.r = Min(res.r, Short4(0x1000, 0x1000, 0x1000, 0x1000)); + res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + res.x = Min(res.x, Short4(0x1000)); - res.g = res.r; - res.b = res.r; - res.a = res.r; + res.y = res.x; + res.z = res.x; + res.w = res.x; } break; case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2 { - res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, current.a) << 4; res.r = AddSat(res.r, arg2->r); - res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, current.a) << 4; res.g = AddSat(res.g, arg2->g); - res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, current.a) << 4; res.b = AddSat(res.b, arg2->b); + res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x); + res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y); + res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z); } break; case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2 { - res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, r.diffuse.a) << 4; res.r = AddSat(res.r, arg2->r); - res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, r.diffuse.a) << 4; res.g = AddSat(res.g, arg2->g); - res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, r.diffuse.a) << 4; res.b = AddSat(res.b, arg2->b); + res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, r.diffuse.w) << 4; res.x = AddSat(res.x, arg2->x); + res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, r.diffuse.w) << 4; res.y = AddSat(res.y, arg2->y); + res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, r.diffuse.w) << 4; res.z = AddSat(res.z, arg2->z); } break; case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2 { - res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.r = AddSat(res.r, arg2->r); - res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.g = AddSat(res.g, arg2->g); - res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.b = AddSat(res.b, arg2->b); + res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x); + res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y); + res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z); } break; case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2 { - res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, texture.a) << 4; res.r = AddSat(res.r, arg2->r); - res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, texture.a) << 4; res.g = AddSat(res.g, arg2->g); - res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, texture.a) << 4; res.b = AddSat(res.b, arg2->b); + res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x); + res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y); + res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z); } break; case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha) { - res.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), texture.a); res.r = MulHigh(res.r, arg2->r) << 4; res.r = AddSat(res.r, arg1->r); - res.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), texture.a); res.g = MulHigh(res.g, arg2->g) << 4; res.g = AddSat(res.g, arg1->g); - res.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), texture.a); res.b = MulHigh(res.b, arg2->b) << 4; res.b = AddSat(res.b, arg1->b); + res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x); + res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y); + res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z); } break; case TextureStage::STAGE_PREMODULATE: { - res.r = arg1->r; - res.g = arg1->g; - res.b = arg1->b; + res.x = arg1->x; + res.y = arg1->y; + res.z = arg1->z; } break; - case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.a * Arg2 + case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2 { - res.r = MulHigh(arg1->a, arg2->r) << 4; res.r = AddSat(res.r, arg1->r); - res.g = MulHigh(arg1->a, arg2->g) << 4; res.g = AddSat(res.g, arg1->g); - res.b = MulHigh(arg1->a, arg2->b) << 4; res.b = AddSat(res.b, arg1->b); + res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x); + res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y); + res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z); } break; - case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.a + case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w { - res.r = MulHigh(arg1->r, arg2->r) << 4; res.r = AddSat(res.r, arg1->a); - res.g = MulHigh(arg1->g, arg2->g) << 4; res.g = AddSat(res.g, arg1->a); - res.b = MulHigh(arg1->b, arg2->b) << 4; res.b = AddSat(res.b, arg1->a); + res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w); + res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w); + res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w); } break; - case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.a) * Arg2 + Arg1 + case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1 { Short4 tmp; - res.r = AddSat(arg1->r, arg2->r); tmp = MulHigh(arg1->a, arg2->r) << 4; res.r = SubSat(res.r, tmp); - res.g = AddSat(arg1->g, arg2->g); tmp = MulHigh(arg1->a, arg2->g) << 4; res.g = SubSat(res.g, tmp); - res.b = AddSat(arg1->b, arg2->b); tmp = MulHigh(arg1->a, arg2->b) << 4; res.b = SubSat(res.b, tmp); + res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp); + res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp); + res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp); } break; - case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.a + case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w { Short4 tmp; - res.r = AddSat(arg1->a, arg2->r); tmp = MulHigh(arg1->r, arg2->r) << 4; res.r = SubSat(res.r, tmp); - res.g = AddSat(arg1->a, arg2->g); tmp = MulHigh(arg1->g, arg2->g) << 4; res.g = SubSat(res.g, tmp); - res.b = AddSat(arg1->a, arg2->b); tmp = MulHigh(arg1->b, arg2->b) << 4; res.b = SubSat(res.b, tmp); + res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp); + res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp); + res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp); } break; case TextureStage::STAGE_BUMPENVMAP: { - r.du = Float4(texture.r) * Float4(1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0); - r.dv = Float4(texture.g) * Float4(1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0); + r.du = Float4(texture.x) * Float4(1.0f / 0x0FE0); + r.dv = Float4(texture.y) * Float4(1.0f / 0x0FE0); Float4 du2; Float4 dv2; @@ -1101,16 +1109,16 @@ perturbate = true; - res.r = r.current.r; - res.g = r.current.g; - res.b = r.current.b; - res.a = r.current.a; + res.x = r.current.x; + res.y = r.current.y; + res.z = r.current.z; + res.w = r.current.w; } break; case TextureStage::STAGE_BUMPENVMAPLUMINANCE: { - r.du = Float4(texture.r) * Float4(1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0); - r.dv = Float4(texture.g) * Float4(1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0); + r.du = Float4(texture.x) * Float4(1.0f / 0x0FE0); + r.dv = Float4(texture.y) * Float4(1.0f / 0x0FE0); Float4 du2; Float4 dv2; @@ -1127,19 +1135,19 @@ perturbate = true; - r.L = texture.b; + r.L = texture.z; r.L = MulHigh(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceScale4))); r.L = r.L << 4; r.L = AddSat(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceOffset4))); r.L = Max(r.L, Short4(0x0000, 0x0000, 0x0000, 0x0000)); - r.L = Min(r.L, Short4(0x1000, 0x1000, 0x1000, 0x1000)); + r.L = Min(r.L, Short4(0x1000)); luminance = true; - res.r = r.current.r; - res.g = r.current.g; - res.b = r.current.b; - res.a = r.current.a; + res.x = r.current.x; + res.y = r.current.y; + res.z = r.current.z; + res.w = r.current.w; } break; default: @@ -1193,7 +1201,7 @@ break; case TextureStage::MODIFIER_INVCOLOR: { - mod1.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a); + mod1.w = SubSat(Short4(0x1000), arg1->w); arg1 = &mod1; } @@ -1205,7 +1213,7 @@ break; case TextureStage::MODIFIER_INVALPHA: { - mod1.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a); + mod1.w = SubSat(Short4(0x1000), arg1->w); arg1 = &mod1; } @@ -1220,7 +1228,7 @@ break; case TextureStage::MODIFIER_INVCOLOR: { - mod2.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a); + mod2.w = SubSat(Short4(0x1000), arg2->w); arg2 = &mod2; } @@ -1232,7 +1240,7 @@ break; case TextureStage::MODIFIER_INVALPHA: { - mod2.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a); + mod2.w = SubSat(Short4(0x1000), arg2->w); arg2 = &mod2; } @@ -1247,7 +1255,7 @@ break; case TextureStage::MODIFIER_INVCOLOR: { - mod3.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a); + mod3.w = SubSat(Short4(0x1000), arg3->w); arg3 = &mod3; } @@ -1259,7 +1267,7 @@ break; case TextureStage::MODIFIER_INVALPHA: { - mod3.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a); + mod3.w = SubSat(Short4(0x1000), arg3->w); arg3 = &mod3; } @@ -1274,104 +1282,104 @@ break; case TextureStage::STAGE_SELECTARG1: // Arg1 { - res.a = arg1->a; + res.w = arg1->w; } break; case TextureStage::STAGE_SELECTARG2: // Arg2 { - res.a = arg2->a; + res.w = arg2->w; } break; case TextureStage::STAGE_SELECTARG3: // Arg3 { - res.a = arg3->a; + res.w = arg3->w; } break; case TextureStage::STAGE_MODULATE: // Arg1 * Arg2 { - res.a = MulHigh(arg1->a, arg2->a) << 4; + res.w = MulHigh(arg1->w, arg2->w) << 4; } break; case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2 { - res.a = MulHigh(arg1->a, arg2->a) << 5; + res.w = MulHigh(arg1->w, arg2->w) << 5; } break; case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4 { - res.a = MulHigh(arg1->a, arg2->a) << 6; + res.w = MulHigh(arg1->w, arg2->w) << 6; } break; case TextureStage::STAGE_ADD: // Arg1 + Arg2 { - res.a = AddSat(arg1->a, arg2->a); + res.w = AddSat(arg1->w, arg2->w); } break; case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5 { - res.a = AddSat(arg1->a, arg2->a); - res.a = SubSat(res.a, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + res.w = AddSat(arg1->w, arg2->w); + res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); } break; case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1 { - res.a = AddSat(arg1->a, arg2->a); - res.a = SubSat(res.a, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - res.a = AddSat(res.a, res.a); + res.w = AddSat(arg1->w, arg2->w); + res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + res.w = AddSat(res.w, res.w); } break; case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2 { - res.a = SubSat(arg1->a, arg2->a); + res.w = SubSat(arg1->w, arg2->w); } break; case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2 { Short4 tmp; - tmp = MulHigh(arg1->a, arg2->a) << 4; res.a = AddSat(arg1->a, arg2->a); res.a = SubSat(res.a, tmp); + tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp); } break; case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2 { - res.a = MulHigh(arg1->a, arg2->a) << 4; res.a = AddSat(res.a, arg3->a); + res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w); } break; case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2 { - res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, arg3->a) << 4; res.a = AddSat(res.a, arg2->a); + res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w); } break; case TextureStage::STAGE_DOT3: break; // Already computed in color channel case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2 { - res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, current.a) << 4; res.a = AddSat(res.a, arg2->a); + res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w); } break; case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha) { - res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, r.diffuse.a) << 4; res.a = AddSat(res.a, arg2->a); + res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, r.diffuse.w) << 4; res.w = AddSat(res.w, arg2->w); } break; case TextureStage::STAGE_BLENDFACTORALPHA: { - res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.a = AddSat(res.a, arg2->a); + res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w); } break; case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha) { - res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, texture.a) << 4; res.a = AddSat(res.a, arg2->a); + res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w); } break; case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha) { - res.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), texture.a); res.a = MulHigh(res.a, arg2->a) << 4; res.a = AddSat(res.a, arg1->a); + res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w); } break; case TextureStage::STAGE_PREMODULATE: { - res.a = arg1->a; + res.w = arg1->w; } break; case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: @@ -1421,9 +1429,9 @@ case TextureStage::STAGE_ADDSIGNED2X: case TextureStage::STAGE_SUBTRACT: case TextureStage::STAGE_ADDSMOOTH: - res.r = Max(res.r, Short4(0x0000, 0x0000, 0x0000, 0x0000)); - res.g = Max(res.g, Short4(0x0000, 0x0000, 0x0000, 0x0000)); - res.b = Max(res.b, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + res.y = Max(res.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + res.z = Max(res.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); break; default: ASSERT(false); @@ -1462,7 +1470,7 @@ case TextureStage::STAGE_ADDSIGNED2X: case TextureStage::STAGE_SUBTRACT: case TextureStage::STAGE_ADDSMOOTH: - res.a = Max(res.a, Short4(0x0000, 0x0000, 0x0000, 0x0000)); + res.w = Max(res.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); break; default: ASSERT(false); @@ -1498,9 +1506,9 @@ case TextureStage::STAGE_BLENDTEXTUREALPHAPM: case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: - res.r = Min(res.r, Short4(0x1000, 0x1000, 0x1000, 0x1000)); - res.g = Min(res.g, Short4(0x1000, 0x1000, 0x1000, 0x1000)); - res.b = Min(res.b, Short4(0x1000, 0x1000, 0x1000, 0x1000)); + res.x = Min(res.x, Short4(0x1000)); + res.y = Min(res.y, Short4(0x1000)); + res.z = Min(res.z, Short4(0x1000)); break; default: ASSERT(false); @@ -1536,7 +1544,7 @@ case TextureStage::STAGE_BLENDTEXTUREALPHAPM: case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: - res.a = Min(res.a, Short4(0x1000, 0x1000, 0x1000, 0x1000)); + res.w = Min(res.w, Short4(0x1000)); break; default: ASSERT(false); @@ -1545,16 +1553,16 @@ switch(textureStage.destinationArgument) { case TextureStage::DESTINATION_CURRENT: - current.r = res.r; - current.g = res.g; - current.b = res.b; - current.a = res.a; + current.x = res.x; + current.y = res.y; + current.z = res.z; + current.w = res.w; break; case TextureStage::DESTINATION_TEMP: - temp.r = res.r; - temp.g = res.g; - temp.b = res.b; - temp.a = res.a; + temp.x = res.x; + temp.y = res.y; + temp.z = res.z; + temp.w = res.w; break; default: ASSERT(false); @@ -1623,7 +1631,7 @@ cMask[3] &= aMask3; } - Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Color4i ¤t) + Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Vector4i ¤t) { if(!state.alphaTestActive()) { @@ -1634,7 +1642,7 @@ if(state.transparencyAntialiasing == Context::TRANSPARENCY_NONE) { - alphaTest(r, aMask, current.a); + alphaTest(r, aMask, current.w); for(unsigned int q = 0; q < state.multiSample; q++) { @@ -1643,7 +1651,7 @@ } else if(state.transparencyAntialiasing == Context::TRANSPARENCY_ALPHA_TO_COVERAGE) { - Float4 alpha = Float4(current.a) * Float4(1.0f / 0x1000); + Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000); alphaToCoverage(r, cMask, alpha); } @@ -1659,7 +1667,7 @@ return pass != 0x0; } - Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Color4f &c0) + Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Vector4f &c0) { if(!state.alphaTestActive()) { @@ -1670,7 +1678,7 @@ if(state.transparencyAntialiasing == Context::TRANSPARENCY_NONE) { - Short4 alpha = RoundShort4(c0.a * Float4(0x1000, 0x1000, 0x1000, 0x1000)); + Short4 alpha = RoundShort4(c0.w * Float4(0x1000)); alphaTest(r, aMask, alpha); @@ -1681,7 +1689,7 @@ } else if(state.transparencyAntialiasing == Context::TRANSPARENCY_ALPHA_TO_COVERAGE) { - alphaToCoverage(r, cMask, c0.a); + alphaToCoverage(r, cMask, c0.w); } else ASSERT(false); @@ -1695,7 +1703,7 @@ return pass != 0x0; } - void PixelRoutine::fogBlend(Registers &r, Color4i ¤t, Float4 &f, Float4 &z, Float4 &rhw) + void PixelRoutine::fogBlend(Registers &r, Vector4i ¤t, Float4 &f, Float4 &z, Float4 &rhw) { if(!state.fogActive) { @@ -1709,18 +1717,18 @@ UShort4 fog = convertFixed16(f, true); - current.r = As<Short4>(MulHigh(As<UShort4>(current.r), fog)); - current.g = As<Short4>(MulHigh(As<UShort4>(current.g), fog)); - current.b = As<Short4>(MulHigh(As<UShort4>(current.b), fog)); + current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog)); + current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog)); + current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog)); - UShort4 invFog = UShort4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - fog; + UShort4 invFog = UShort4(0xFFFFu) - fog; - current.r += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[0])))); - current.g += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[1])))); - current.b += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[2])))); + current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[0])))); + current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[1])))); + current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[2])))); } - void PixelRoutine::fogBlend(Registers &r, Color4f &c0, Float4 &fog, Float4 &z, Float4 &rhw) + void PixelRoutine::fogBlend(Registers &r, Vector4f &c0, Float4 &fog, Float4 &z, Float4 &rhw) { if(!state.fogActive) { @@ -1731,21 +1739,21 @@ { pixelFog(r, fog, z, rhw); - fog = Min(fog, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - fog = Max(fog, Float4(0.0f, 0.0f, 0.0f, 0.0f)); + fog = Min(fog, Float4(1.0f)); + fog = Max(fog, Float4(0.0f)); } - c0.r -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0])); - c0.g -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1])); - c0.b -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2])); + c0.x -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0])); + c0.y -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1])); + c0.z -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2])); - c0.r *= fog; - c0.g *= fog; - c0.b *= fog; + c0.x *= fog; + c0.y *= fog; + c0.z *= fog; - c0.r += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0])); - c0.g += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1])); - c0.b += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2])); + c0.x += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0])); + c0.y += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1])); + c0.z += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2])); } void PixelRoutine::pixelFog(Registers &r, Float4 &visibility, Float4 &z, Float4 &rhw) @@ -1762,7 +1770,7 @@ { if(complementaryDepthBuffer) { - zw = Float4(1.0f, 1.0f, 1.0f, 1.0f) - z; + zw = Float4(1.0f) - z; } else { @@ -1781,12 +1789,12 @@ break; case Context::FOG_EXP: zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE)); - zw = exponential(zw, true); + zw = exponential2(zw, true); break; case Context::FOG_EXP2: zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE2)); zw *= zw; - zw = exponential(zw, true); + zw = exponential2(zw, true); zw = Rcp_pp(zw); break; default: @@ -1794,16 +1802,16 @@ } } - void PixelRoutine::specularPixel(Color4i ¤t, Color4i &specular) + void PixelRoutine::specularPixel(Vector4i ¤t, Vector4i &specular) { if(!state.specularAdd) { return; } - current.r = AddSat(current.r, specular.r); - current.g = AddSat(current.g, specular.g); - current.b = AddSat(current.b, specular.b); + current.x = AddSat(current.x, specular.x); + current.y = AddSat(current.y, specular.y); + current.z = AddSat(current.z, specular.z); } void PixelRoutine::writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask) @@ -1815,11 +1823,11 @@ Float4 Z = z; - if(pixelShader && pixelShader->depthOverride()) + if(shader && shader->depthOverride()) { if(complementaryDepthBuffer) { - Z = Float4(1, 1, 1, 1) - r.oDepth; + Z = Float4(1.0f) - r.oDepth; } else { @@ -2009,12 +2017,12 @@ } } - void PixelRoutine::sampleTexture(Registers &r, Color4i &c, int coordinates, int stage, bool project) + void PixelRoutine::sampleTexture(Registers &r, Vector4i &c, int coordinates, int stage, bool project) { - Float4 u = r.vx[2 + coordinates]; - Float4 v = r.vy[2 + coordinates]; - Float4 w = r.vz[2 + coordinates]; - Float4 q = r.vw[2 + coordinates]; + Float4 u = r.vf[2 + coordinates].x; + Float4 v = r.vf[2 + coordinates].y; + Float4 w = r.vf[2 + coordinates].z; + Float4 q = r.vf[2 + coordinates].w; if(perturbate) { @@ -2027,15 +2035,15 @@ sampleTexture(r, c, stage, u, v, w, q, project); } - void PixelRoutine::sampleTexture(Registers &r, Color4i &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project, bool bias, bool fixed12) + void PixelRoutine::sampleTexture(Registers &r, Vector4i &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project, bool bias, bool fixed12) { - Color4f dsx; - Color4f dsy; + Vector4f dsx; + Vector4f dsy; sampleTexture(r, c, stage, u, v, w, q, dsx, dsy, project, bias, fixed12, false); } - void PixelRoutine::sampleTexture(Registers &r, Color4i &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool project, bool bias, bool fixed12, bool gradients, bool lodProvided) + void PixelRoutine::sampleTexture(Registers &r, Vector4i &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool fixed12, bool gradients, bool lodProvided) { #if PERF_PROFILE Long texTime = Ticks(); @@ -2063,7 +2071,31 @@ #endif } - void PixelRoutine::sampleTexture(Registers &r, Color4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool project, bool bias, bool gradients, bool lodProvided) + void PixelRoutine::sampleTexture(Registers &r, Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided) + { + if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID) + { + sampleTexture(r, c, sampler.index, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided); + } + else + { + Int index = As<Int>(Float(reg(r, sampler).x.x)); + + for(int i = 0; i < 16; i++) + { + if(shader->usesSampler(i)) + { + If(index == i) + { + sampleTexture(r, c, i, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided); + // FIXME: When the sampler states are the same, we could use one sampler and just index the texture + } + } + } + } + } + + void PixelRoutine::sampleTexture(Registers &r, Vector4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided) { #if PERF_PROFILE Long texTime = Ticks(); @@ -2091,7 +2123,7 @@ #endif } - void PixelRoutine::clampColor(Color4f oC[4]) + void PixelRoutine::clampColor(Vector4f oC[4]) { for(int index = 0; index < 4; index++) { @@ -2108,10 +2140,10 @@ case FORMAT_A8R8G8B8: case FORMAT_X8R8G8B8: case FORMAT_G16R16: - oC[index].r = Max(oC[index].r, Float4(0.0f, 0.0f, 0.0f, 0.0f)); oC[index].r = Min(oC[index].r, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - oC[index].g = Max(oC[index].g, Float4(0.0f, 0.0f, 0.0f, 0.0f)); oC[index].g = Min(oC[index].g, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - oC[index].b = Max(oC[index].b, Float4(0.0f, 0.0f, 0.0f, 0.0f)); oC[index].b = Min(oC[index].b, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - oC[index].a = Max(oC[index].a, Float4(0.0f, 0.0f, 0.0f, 0.0f)); oC[index].a = Min(oC[index].a, Float4(1.0f, 1.0f, 1.0f, 1.0f)); + oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); + oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); + oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); + oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); break; case FORMAT_R32F: case FORMAT_G32R32F: @@ -2123,14 +2155,14 @@ } } - void PixelRoutine::rasterOperation(Color4i ¤t, Registers &r, Float4 &fog, Pointer<Byte> &cBuffer, Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) + void PixelRoutine::rasterOperation(Vector4i ¤t, Registers &r, Float4 &fog, Pointer<Byte> &cBuffer, Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) { if(!state.colorWriteActive(0)) { return; } - Color4f oC; + Vector4f oC; switch(state.targetFormat[0]) { @@ -2144,10 +2176,10 @@ } else { - current.r <<= 4; - current.g <<= 4; - current.b <<= 4; - current.a <<= 4; + current.x <<= 4; + current.y <<= 4; + current.z <<= 4; + current.w <<= 4; } fogBlend(r, current, fog, r.z[0], r.rhw); @@ -2155,7 +2187,7 @@ for(unsigned int q = 0; q < state.multiSample; q++) { Pointer<Byte> buffer = cBuffer + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[0])); - Color4i color = current; + Vector4i color = current; if(state.multiSampleMask & (1 << q)) { @@ -2173,7 +2205,7 @@ for(unsigned int q = 0; q < state.multiSample; q++) { Pointer<Byte> buffer = cBuffer + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[0])); - Color4f color = oC; + Vector4f color = oC; if(state.multiSampleMask & (1 << q)) { @@ -2187,7 +2219,7 @@ } } - void PixelRoutine::rasterOperation(Color4f oC[4], Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) + void PixelRoutine::rasterOperation(Vector4f oC[4], Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) { for(int index = 0; index < 4; index++) { @@ -2198,9 +2230,9 @@ if(!postBlendSRGB && state.writeSRGB) { - oC[index].r = linearToSRGB(oC[index].r); - oC[index].g = linearToSRGB(oC[index].g); - oC[index].b = linearToSRGB(oC[index].b); + oC[index].x = linearToSRGB(oC[index].x); + oC[index].y = linearToSRGB(oC[index].y); + oC[index].z = linearToSRGB(oC[index].z); } if(index == 0) @@ -2217,12 +2249,12 @@ for(unsigned int q = 0; q < state.multiSample; q++) { Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[index])); - Color4i color; + Vector4i color; - color.r = convertFixed16(oC[index].r, false); - color.g = convertFixed16(oC[index].g, false); - color.b = convertFixed16(oC[index].b, false); - color.a = convertFixed16(oC[index].a, false); + color.x = convertFixed16(oC[index].x, false); + color.y = convertFixed16(oC[index].y, false); + color.z = convertFixed16(oC[index].z, false); + color.w = convertFixed16(oC[index].w, false); if(state.multiSampleMask & (1 << q)) { @@ -2237,7 +2269,7 @@ for(unsigned int q = 0; q < state.multiSample; q++) { Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[index])); - Color4f color = oC[index]; + Vector4f color = oC[index]; if(state.multiSampleMask & (1 << q)) { @@ -2252,7 +2284,7 @@ } } - void PixelRoutine::blendFactor(Registers &r, const Color4i &blendFactor, const Color4i ¤t, const Color4i &pixel, Context::BlendFactor blendFactorActive) + void PixelRoutine::blendFactor(Registers &r, const Vector4i &blendFactor, const Vector4i ¤t, const Vector4i &pixel, Context::BlendFactor blendFactorActive) { switch(blendFactorActive) { @@ -2263,77 +2295,77 @@ // Optimized break; case Context::BLEND_SOURCE: - blendFactor.r = current.r; - blendFactor.g = current.g; - blendFactor.b = current.b; + blendFactor.x = current.x; + blendFactor.y = current.y; + blendFactor.z = current.z; break; case Context::BLEND_INVSOURCE: - blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.r; - blendFactor.g = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.g; - blendFactor.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.b; + blendFactor.x = Short4(0xFFFFu) - current.x; + blendFactor.y = Short4(0xFFFFu) - current.y; + blendFactor.z = Short4(0xFFFFu) - current.z; break; case Context::BLEND_DEST: - blendFactor.r = pixel.r; - blendFactor.g = pixel.g; - blendFactor.b = pixel.b; + blendFactor.x = pixel.x; + blendFactor.y = pixel.y; + blendFactor.z = pixel.z; break; case Context::BLEND_INVDEST: - blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.r; - blendFactor.g = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.g; - blendFactor.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.b; + blendFactor.x = Short4(0xFFFFu) - pixel.x; + blendFactor.y = Short4(0xFFFFu) - pixel.y; + blendFactor.z = Short4(0xFFFFu) - pixel.z; break; case Context::BLEND_SOURCEALPHA: - blendFactor.r = current.a; - blendFactor.g = current.a; - blendFactor.b = current.a; + blendFactor.x = current.w; + blendFactor.y = current.w; + blendFactor.z = current.w; break; case Context::BLEND_INVSOURCEALPHA: - blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a; - blendFactor.g = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a; - blendFactor.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a; + blendFactor.x = Short4(0xFFFFu) - current.w; + blendFactor.y = Short4(0xFFFFu) - current.w; + blendFactor.z = Short4(0xFFFFu) - current.w; break; case Context::BLEND_DESTALPHA: - blendFactor.r = pixel.a; - blendFactor.g = pixel.a; - blendFactor.b = pixel.a; + blendFactor.x = pixel.w; + blendFactor.y = pixel.w; + blendFactor.z = pixel.w; break; case Context::BLEND_INVDESTALPHA: - blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a; - blendFactor.g = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a; - blendFactor.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a; + blendFactor.x = Short4(0xFFFFu) - pixel.w; + blendFactor.y = Short4(0xFFFFu) - pixel.w; + blendFactor.z = Short4(0xFFFFu) - pixel.w; break; case Context::BLEND_SRCALPHASAT: - blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a; - blendFactor.r = Min(As<UShort4>(blendFactor.r), As<UShort4>(current.a)); - blendFactor.g = blendFactor.r; - blendFactor.b = blendFactor.r; + blendFactor.x = Short4(0xFFFFu) - pixel.w; + blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w)); + blendFactor.y = blendFactor.x; + blendFactor.z = blendFactor.x; break; case Context::BLEND_CONSTANT: - blendFactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[0])); - blendFactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[1])); - blendFactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[2])); + blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[0])); + blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[1])); + blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[2])); break; case Context::BLEND_INVCONSTANT: - blendFactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[0])); - blendFactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[1])); - blendFactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[2])); + blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[0])); + blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[1])); + blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[2])); break; case Context::BLEND_CONSTANTALPHA: - blendFactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); - blendFactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); - blendFactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); + blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); + blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); + blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); break; case Context::BLEND_INVCONSTANTALPHA: - blendFactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); - blendFactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); - blendFactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); + blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); + blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); + blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); break; default: ASSERT(false); } } - void PixelRoutine::blendFactorAlpha(Registers &r, const Color4i &blendFactor, const Color4i ¤t, const Color4i &pixel, Context::BlendFactor blendFactorAlphaActive) + void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4i &blendFactor, const Vector4i ¤t, const Vector4i &pixel, Context::BlendFactor blendFactorAlphaActive) { switch(blendFactorAlphaActive) { @@ -2344,46 +2376,46 @@ // Optimized break; case Context::BLEND_SOURCE: - blendFactor.a = current.a; + blendFactor.w = current.w; break; case Context::BLEND_INVSOURCE: - blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a; + blendFactor.w = Short4(0xFFFFu) - current.w; break; case Context::BLEND_DEST: - blendFactor.a = pixel.a; + blendFactor.w = pixel.w; break; case Context::BLEND_INVDEST: - blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a; + blendFactor.w = Short4(0xFFFFu) - pixel.w; break; case Context::BLEND_SOURCEALPHA: - blendFactor.a = current.a; + blendFactor.w = current.w; break; case Context::BLEND_INVSOURCEALPHA: - blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a; + blendFactor.w = Short4(0xFFFFu) - current.w; break; case Context::BLEND_DESTALPHA: - blendFactor.a = pixel.a; + blendFactor.w = pixel.w; break; case Context::BLEND_INVDESTALPHA: - blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a; + blendFactor.w = Short4(0xFFFFu) - pixel.w; break; case Context::BLEND_SRCALPHASAT: - blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + blendFactor.w = Short4(0xFFFFu); break; case Context::BLEND_CONSTANT: case Context::BLEND_CONSTANTALPHA: - blendFactor.a = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); + blendFactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); break; case Context::BLEND_INVCONSTANT: case Context::BLEND_INVCONSTANTALPHA: - blendFactor.a = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); + blendFactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); break; default: ASSERT(false); } } - void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Color4i ¤t, Int &x) + void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4i ¤t, Int &x) { if(!state.alphaBlendActive) { @@ -2392,7 +2424,7 @@ Pointer<Byte> buffer; - Color4i pixel; + Vector4i pixel; Short4 c01; Short4 c23; @@ -2404,74 +2436,74 @@ c01 = *Pointer<Short4>(buffer); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); c23 = *Pointer<Short4>(buffer); - pixel.b = c01; - pixel.g = c01; - pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(c23)); - pixel.g = UnpackHigh(As<Byte8>(pixel.g), As<Byte8>(c23)); - pixel.r = pixel.b; - pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(pixel.g)); - pixel.r = UnpackHigh(As<Byte8>(pixel.r), As<Byte8>(pixel.g)); - pixel.g = pixel.b; - pixel.a = pixel.r; - pixel.r = UnpackLow(As<Byte8>(pixel.r), As<Byte8>(pixel.r)); - pixel.g = UnpackHigh(As<Byte8>(pixel.g), As<Byte8>(pixel.g)); - pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(pixel.b)); - pixel.a = UnpackHigh(As<Byte8>(pixel.a), As<Byte8>(pixel.a)); + pixel.z = c01; + pixel.y = c01; + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); + pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); + pixel.x = pixel.z; + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); + pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); + pixel.y = pixel.z; + pixel.w = pixel.x; + pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); + pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); + pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); break; case FORMAT_X8R8G8B8: buffer = cBuffer + 4 * x; c01 = *Pointer<Short4>(buffer); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); c23 = *Pointer<Short4>(buffer); - pixel.b = c01; - pixel.g = c01; - pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(c23)); - pixel.g = UnpackHigh(As<Byte8>(pixel.g), As<Byte8>(c23)); - pixel.r = pixel.b; - pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(pixel.g)); - pixel.r = UnpackHigh(As<Byte8>(pixel.r), As<Byte8>(pixel.g)); - pixel.g = pixel.b; - pixel.r = UnpackLow(As<Byte8>(pixel.r), As<Byte8>(pixel.r)); - pixel.g = UnpackHigh(As<Byte8>(pixel.g), As<Byte8>(pixel.g)); - pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(pixel.b)); - pixel.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + pixel.z = c01; + pixel.y = c01; + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); + pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); + pixel.x = pixel.z; + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); + pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); + pixel.y = pixel.z; + pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); + pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); + pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); + pixel.w = Short4(0xFFFFu); break; case FORMAT_A8G8R8B8Q: UNIMPLEMENTED(); - // pixel.b = UnpackLow(As<Byte8>(pixel.b), *Pointer<Byte8>(cBuffer + 8 * x + 0)); - // pixel.r = UnpackHigh(As<Byte8>(pixel.r), *Pointer<Byte8>(cBuffer + 8 * x + 0)); - // pixel.g = UnpackLow(As<Byte8>(pixel.g), *Pointer<Byte8>(cBuffer + 8 * x + 8)); - // pixel.a = UnpackHigh(As<Byte8>(pixel.a), *Pointer<Byte8>(cBuffer + 8 * x + 8)); + // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); + // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); + // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); + // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8)); break; case FORMAT_X8G8R8B8Q: UNIMPLEMENTED(); - // pixel.b = UnpackLow(As<Byte8>(pixel.b), *Pointer<Byte8>(cBuffer + 8 * x + 0)); - // pixel.r = UnpackHigh(As<Byte8>(pixel.r), *Pointer<Byte8>(cBuffer + 8 * x + 0)); - // pixel.g = UnpackLow(As<Byte8>(pixel.g), *Pointer<Byte8>(cBuffer + 8 * x + 8)); - // pixel.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); + // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); + // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); + // pixel.w = Short4(0xFFFFu); break; case FORMAT_A16B16G16R16: buffer = cBuffer; - pixel.r = *Pointer<Short4>(buffer + 8 * x); - pixel.g = *Pointer<Short4>(buffer + 8 * x + 8); + pixel.x = *Pointer<Short4>(buffer + 8 * x); + pixel.y = *Pointer<Short4>(buffer + 8 * x + 8); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); - pixel.b = *Pointer<Short4>(buffer + 8 * x); - pixel.a = *Pointer<Short4>(buffer + 8 * x + 8); - transpose4x4(pixel.r, pixel.g, pixel.b, pixel.a); + pixel.z = *Pointer<Short4>(buffer + 8 * x); + pixel.w = *Pointer<Short4>(buffer + 8 * x + 8); + transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); break; case FORMAT_G16R16: buffer = cBuffer; - pixel.r = *Pointer<Short4>(buffer + 4 * x); + pixel.x = *Pointer<Short4>(buffer + 4 * x); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); - pixel.g = *Pointer<Short4>(buffer + 4 * x); - pixel.b = pixel.r; - pixel.r = As<Short4>(UnpackLow(pixel.r, pixel.g)); - pixel.b = As<Short4>(UnpackHigh(pixel.b, pixel.g)); - pixel.g = pixel.b; - pixel.r = As<Short4>(UnpackLow(pixel.r, pixel.b)); - pixel.g = As<Short4>(UnpackHigh(pixel.g, pixel.b)); - pixel.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); - pixel.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + pixel.y = *Pointer<Short4>(buffer + 4 * x); + pixel.z = pixel.x; + pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); + pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); + pixel.y = pixel.z; + pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z)); + pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z)); + pixel.z = Short4(0xFFFFu); + pixel.w = Short4(0xFFFFu); break; default: ASSERT(false); @@ -2483,65 +2515,65 @@ } // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor - Color4i sourceFactor; - Color4i destFactor; + Vector4i sourceFactor; + Vector4i destFactor; blendFactor(r, sourceFactor, current, pixel, (Context::BlendFactor)state.sourceBlendFactor); blendFactor(r, destFactor, current, pixel, (Context::BlendFactor)state.destBlendFactor); if(state.sourceBlendFactor != Context::BLEND_ONE && state.sourceBlendFactor != Context::BLEND_ZERO) { - current.r = MulHigh(As<UShort4>(current.r), As<UShort4>(sourceFactor.r)); - current.g = MulHigh(As<UShort4>(current.g), As<UShort4>(sourceFactor.g)); - current.b = MulHigh(As<UShort4>(current.b), As<UShort4>(sourceFactor.b)); + current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x)); + current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y)); + current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z)); } if(state.destBlendFactor != Context::BLEND_ONE && state.destBlendFactor != Context::BLEND_ZERO) { - pixel.r = MulHigh(As<UShort4>(pixel.r), As<UShort4>(destFactor.r)); - pixel.g = MulHigh(As<UShort4>(pixel.g), As<UShort4>(destFactor.g)); - pixel.b = MulHigh(As<UShort4>(pixel.b), As<UShort4>(destFactor.b)); + pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x)); + pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y)); + pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z)); } switch(state.blendOperation) { case Context::BLENDOP_ADD: - current.r = AddSat(As<UShort4>(current.r), As<UShort4>(pixel.r)); - current.g = AddSat(As<UShort4>(current.g), As<UShort4>(pixel.g)); - current.b = AddSat(As<UShort4>(current.b), As<UShort4>(pixel.b)); + current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); + current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); + current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); break; case Context::BLENDOP_SUB: - current.r = SubSat(As<UShort4>(current.r), As<UShort4>(pixel.r)); - current.g = SubSat(As<UShort4>(current.g), As<UShort4>(pixel.g)); - current.b = SubSat(As<UShort4>(current.b), As<UShort4>(pixel.b)); + current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); + current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); + current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); break; case Context::BLENDOP_INVSUB: - current.r = SubSat(As<UShort4>(pixel.r), As<UShort4>(current.r)); - current.g = SubSat(As<UShort4>(pixel.g), As<UShort4>(current.g)); - current.b = SubSat(As<UShort4>(pixel.b), As<UShort4>(current.b)); + current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x)); + current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y)); + current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z)); break; case Context::BLENDOP_MIN: - current.r = Min(As<UShort4>(current.r), As<UShort4>(pixel.r)); - current.g = Min(As<UShort4>(current.g), As<UShort4>(pixel.g)); - current.b = Min(As<UShort4>(current.b), As<UShort4>(pixel.b)); + current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x)); + current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y)); + current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z)); break; case Context::BLENDOP_MAX: - current.r = Max(As<UShort4>(current.r), As<UShort4>(pixel.r)); - current.g = Max(As<UShort4>(current.g), As<UShort4>(pixel.g)); - current.b = Max(As<UShort4>(current.b), As<UShort4>(pixel.b)); + current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x)); + current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y)); + current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z)); break; case Context::BLENDOP_SOURCE: // No operation break; case Context::BLENDOP_DEST: - current.r = pixel.r; - current.g = pixel.g; - current.b = pixel.b; + current.x = pixel.x; + current.y = pixel.y; + current.z = pixel.z; break; case Context::BLENDOP_NULL: - current.r = Short4(0x0000, 0x0000, 0x0000, 0x0000); - current.g = Short4(0x0000, 0x0000, 0x0000, 0x0000); - current.b = Short4(0x0000, 0x0000, 0x0000, 0x0000); + current.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); + current.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); + current.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); break; default: ASSERT(false); @@ -2552,46 +2584,46 @@ if(state.sourceBlendFactorAlpha != Context::BLEND_ONE && state.sourceBlendFactorAlpha != Context::BLEND_ZERO) { - current.a = MulHigh(As<UShort4>(current.a), As<UShort4>(sourceFactor.a)); + current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w)); } if(state.destBlendFactorAlpha != Context::BLEND_ONE && state.destBlendFactorAlpha != Context::BLEND_ZERO) { - pixel.a = MulHigh(As<UShort4>(pixel.a), As<UShort4>(destFactor.a)); + pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w)); } switch(state.blendOperationAlpha) { case Context::BLENDOP_ADD: - current.a = AddSat(As<UShort4>(current.a), As<UShort4>(pixel.a)); + current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); break; case Context::BLENDOP_SUB: - current.a = SubSat(As<UShort4>(current.a), As<UShort4>(pixel.a)); + current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); break; case Context::BLENDOP_INVSUB: - current.a = SubSat(As<UShort4>(pixel.a), As<UShort4>(current.a)); + current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w)); break; case Context::BLENDOP_MIN: - current.a = Min(As<UShort4>(current.a), As<UShort4>(pixel.a)); + current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w)); break; case Context::BLENDOP_MAX: - current.a = Max(As<UShort4>(current.a), As<UShort4>(pixel.a)); + current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w)); break; case Context::BLENDOP_SOURCE: // No operation break; case Context::BLENDOP_DEST: - current.a = pixel.a; + current.w = pixel.w; break; case Context::BLENDOP_NULL: - current.a = Short4(0x0000, 0x0000, 0x0000, 0x0000); + current.w = Short4(0x0000, 0x0000, 0x0000, 0x0000); break; default: ASSERT(false); } } - void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Color4i ¤t, Int &sMask, Int &zMask, Int &cMask) + void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4i ¤t, Int &sMask, Int &zMask, Int &cMask) { if(!state.colorWriteActive(index)) { @@ -2612,10 +2644,10 @@ case FORMAT_X8R8G8B8: case FORMAT_A8R8G8B8: { - current.r = current.r - As<Short4>(As<UShort4>(current.r) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); - current.g = current.g - As<Short4>(As<UShort4>(current.g) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); - current.b = current.b - As<Short4>(As<UShort4>(current.b) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); - current.a = current.a - As<Short4>(As<UShort4>(current.a) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); + current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); + current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); + current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); + current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); } break; } @@ -2629,78 +2661,78 @@ { case FORMAT_X8G8R8B8Q: UNIMPLEMENTED(); - // current.r = As<Short4>(As<UShort4>(current.r) >> 8); - // current.g = As<Short4>(As<UShort4>(current.g) >> 8); - // current.b = As<Short4>(As<UShort4>(current.b) >> 8); + // current.x = As<Short4>(As<UShort4>(current.x) >> 8); + // current.y = As<Short4>(As<UShort4>(current.y) >> 8); + // current.z = As<Short4>(As<UShort4>(current.z) >> 8); - // current.b = As<Short4>(Pack(As<UShort4>(current.b), As<UShort4>(current.r))); - // current.g = As<Short4>(Pack(As<UShort4>(current.g), As<UShort4>(current.g))); + // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); + // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); break; case FORMAT_A8G8R8B8Q: UNIMPLEMENTED(); - // current.r = As<Short4>(As<UShort4>(current.r) >> 8); - // current.g = As<Short4>(As<UShort4>(current.g) >> 8); - // current.b = As<Short4>(As<UShort4>(current.b) >> 8); - // current.a = As<Short4>(As<UShort4>(current.a) >> 8); + // current.x = As<Short4>(As<UShort4>(current.x) >> 8); + // current.y = As<Short4>(As<UShort4>(current.y) >> 8); + // current.z = As<Short4>(As<UShort4>(current.z) >> 8); + // current.w = As<Short4>(As<UShort4>(current.w) >> 8); - // current.b = As<Short4>(Pack(As<UShort4>(current.b), As<UShort4>(current.r))); - // current.g = As<Short4>(Pack(As<UShort4>(current.g), As<UShort4>(current.a))); + // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); + // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); break; case FORMAT_X8R8G8B8: case FORMAT_A8R8G8B8: if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7) { - current.r = As<Short4>(As<UShort4>(current.r) >> 8); - current.g = As<Short4>(As<UShort4>(current.g) >> 8); - current.b = As<Short4>(As<UShort4>(current.b) >> 8); + current.x = As<Short4>(As<UShort4>(current.x) >> 8); + current.y = As<Short4>(As<UShort4>(current.y) >> 8); + current.z = As<Short4>(As<UShort4>(current.z) >> 8); - current.b = As<Short4>(Pack(As<UShort4>(current.b), As<UShort4>(current.r))); - current.g = As<Short4>(Pack(As<UShort4>(current.g), As<UShort4>(current.g))); + current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); + current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); - current.r = current.b; - current.b = UnpackLow(As<Byte8>(current.b), As<Byte8>(current.g)); - current.r = UnpackHigh(As<Byte8>(current.r), As<Byte8>(current.g)); - current.g = current.b; - current.b = As<Short4>(UnpackLow(current.b, current.r)); - current.g = As<Short4>(UnpackHigh(current.g, current.r)); + current.x = current.z; + current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); + current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); + current.y = current.z; + current.z = As<Short4>(UnpackLow(current.z, current.x)); + current.y = As<Short4>(UnpackHigh(current.y, current.x)); } else { - current.r = As<Short4>(As<UShort4>(current.r) >> 8); - current.g = As<Short4>(As<UShort4>(current.g) >> 8); - current.b = As<Short4>(As<UShort4>(current.b) >> 8); - current.a = As<Short4>(As<UShort4>(current.a) >> 8); + current.x = As<Short4>(As<UShort4>(current.x) >> 8); + current.y = As<Short4>(As<UShort4>(current.y) >> 8); + current.z = As<Short4>(As<UShort4>(current.z) >> 8); + current.w = As<Short4>(As<UShort4>(current.w) >> 8); - current.b = As<Short4>(Pack(As<UShort4>(current.b), As<UShort4>(current.r))); - current.g = As<Short4>(Pack(As<UShort4>(current.g), As<UShort4>(current.a))); + current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); + current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); - current.r = current.b; - current.b = UnpackLow(As<Byte8>(current.b), As<Byte8>(current.g)); - current.r = UnpackHigh(As<Byte8>(current.r), As<Byte8>(current.g)); - current.g = current.b; - current.b = As<Short4>(UnpackLow(current.b, current.r)); - current.g = As<Short4>(UnpackHigh(current.g, current.r)); + current.x = current.z; + current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); + current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); + current.y = current.z; + current.z = As<Short4>(UnpackLow(current.z, current.x)); + current.y = As<Short4>(UnpackHigh(current.y, current.x)); } break; case FORMAT_G16R16: - current.b = current.r; - current.r = As<Short4>(UnpackLow(current.r, current.g)); - current.b = As<Short4>(UnpackHigh(current.b, current.g)); - current.g = current.b; + current.z = current.x; + current.x = As<Short4>(UnpackLow(current.x, current.y)); + current.z = As<Short4>(UnpackHigh(current.z, current.y)); + current.y = current.z; break; case FORMAT_A16B16G16R16: - transpose4x4(current.r, current.g, current.b, current.a); + transpose4x4(current.x, current.y, current.z, current.w); break; case FORMAT_R32F: case FORMAT_G32R32F: case FORMAT_A32B32G32R32F: { - Color4f oC; + Vector4f oC; - oC.r = convertUnsigned16(UShort4(current.r)); - oC.g = convertUnsigned16(UShort4(current.g)); - oC.b = convertUnsigned16(UShort4(current.b)); - oC.a = convertUnsigned16(UShort4(current.a)); + oC.x = convertUnsigned16(UShort4(current.x)); + oC.y = convertUnsigned16(UShort4(current.y)); + oC.z = convertUnsigned16(UShort4(current.z)); + oC.w = convertUnsigned16(UShort4(current.w)); writeColor(r, index, cBuffer, x, oC, sMask, zMask, cMask); } @@ -2709,8 +2741,8 @@ ASSERT(false); } - Short4 c01 = current.b; - Short4 c23 = current.g; + Short4 c01 = current.z; + Short4 c23 = current.y; Int xMask; // Combination of all masks @@ -2816,15 +2848,15 @@ if((rgbaWriteMask & 0x00000003) != 0x00000003) { Short4 masked = value; - current.r &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); + current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0])); - current.r |= masked; + current.x |= masked; } - current.r &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8); + current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8); value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); - current.r |= value; - *Pointer<Short4>(buffer) = current.r; + current.x |= value; + *Pointer<Short4>(buffer) = current.x; buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); @@ -2833,15 +2865,15 @@ if((rgbaWriteMask & 0x00000003) != 0x00000003) { Short4 masked = value; - current.g &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); + current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0])); - current.g |= masked; + current.y |= masked; } - current.g &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8); + current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8); value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); - current.g |= value; - *Pointer<Short4>(buffer) = current.g; + current.y |= value; + *Pointer<Short4>(buffer) = current.y; break; case FORMAT_A16B16G16R16: buffer = cBuffer + 8 * x; @@ -2852,15 +2884,15 @@ if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.r &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); + current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); - current.r |= masked; + current.x |= masked; } - current.r &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ0Q) + xMask * 8); + current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ0Q) + xMask * 8); value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8); - current.r |= value; - *Pointer<Short4>(buffer) = current.r; + current.x |= value; + *Pointer<Short4>(buffer) = current.x; } { @@ -2869,15 +2901,15 @@ if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.g &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); + current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); - current.g |= masked; + current.y |= masked; } - current.g &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ1Q) + xMask * 8); + current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ1Q) + xMask * 8); value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8); - current.g |= value; - *Pointer<Short4>(buffer + 8) = current.g; + current.y |= value; + *Pointer<Short4>(buffer + 8) = current.y; } buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); @@ -2888,15 +2920,15 @@ if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.b &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); + current.z &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); - current.b |= masked; + current.z |= masked; } - current.b &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ2Q) + xMask * 8); + current.z &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ2Q) + xMask * 8); value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8); - current.b |= value; - *Pointer<Short4>(buffer) = current.b; + current.z |= value; + *Pointer<Short4>(buffer) = current.z; } { @@ -2905,15 +2937,15 @@ if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.a &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); + current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); - current.a |= masked; + current.w |= masked; } - current.a &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ3Q) + xMask * 8); + current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ3Q) + xMask * 8); value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8); - current.a |= value; - *Pointer<Short4>(buffer + 8) = current.a; + current.w |= value; + *Pointer<Short4>(buffer + 8) = current.w; } break; default: @@ -2921,7 +2953,7 @@ } } - void PixelRoutine::blendFactor(Registers &r, const Color4f &blendFactor, const Color4f &oC, const Color4f &pixel, Context::BlendFactor blendFactorActive) + void PixelRoutine::blendFactor(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, Context::BlendFactor blendFactorActive) { switch(blendFactorActive) { @@ -2932,67 +2964,67 @@ // Optimized break; case Context::BLEND_SOURCE: - blendFactor.r = oC.r; - blendFactor.g = oC.g; - blendFactor.b = oC.b; + blendFactor.x = oC.x; + blendFactor.y = oC.y; + blendFactor.z = oC.z; break; case Context::BLEND_INVSOURCE: - blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.r; - blendFactor.g = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.g; - blendFactor.b = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.b; + blendFactor.x = Float4(1.0f) - oC.x; + blendFactor.y = Float4(1.0f) - oC.y; + blendFactor.z = Float4(1.0f) - oC.z; break; case Context::BLEND_DEST: - blendFactor.r = pixel.r; - blendFactor.g = pixel.g; - blendFactor.b = pixel.b; + blendFactor.x = pixel.x; + blendFactor.y = pixel.y; + blendFactor.z = pixel.z; break; case Context::BLEND_INVDEST: - blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.r; - blendFactor.g = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.g; - blendFactor.b = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.b; + blendFactor.x = Float4(1.0f) - pixel.x; + blendFactor.y = Float4(1.0f) - pixel.y; + blendFactor.z = Float4(1.0f) - pixel.z; break; case Context::BLEND_SOURCEALPHA: - blendFactor.r = oC.a; - blendFactor.g = oC.a; - blendFactor.b = oC.a; + blendFactor.x = oC.w; + blendFactor.y = oC.w; + blendFactor.z = oC.w; break; case Context::BLEND_INVSOURCEALPHA: - blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a; - blendFactor.g = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a; - blendFactor.b = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a; + blendFactor.x = Float4(1.0f) - oC.w; + blendFactor.y = Float4(1.0f) - oC.w; + blendFactor.z = Float4(1.0f) - oC.w; break; case Context::BLEND_DESTALPHA: - blendFactor.r = pixel.a; - blendFactor.g = pixel.a; - blendFactor.b = pixel.a; + blendFactor.x = pixel.w; + blendFactor.y = pixel.w; + blendFactor.z = pixel.w; break; case Context::BLEND_INVDESTALPHA: - blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a; - blendFactor.g = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a; - blendFactor.b = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a; + blendFactor.x = Float4(1.0f) - pixel.w; + blendFactor.y = Float4(1.0f) - pixel.w; + blendFactor.z = Float4(1.0f) - pixel.w; break; case Context::BLEND_SRCALPHASAT: - blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a; - blendFactor.r = Min(blendFactor.r, oC.a); - blendFactor.g = blendFactor.r; - blendFactor.b = blendFactor.r; + blendFactor.x = Float4(1.0f) - pixel.w; + blendFactor.x = Min(blendFactor.x, oC.w); + blendFactor.y = blendFactor.x; + blendFactor.z = blendFactor.x; break; case Context::BLEND_CONSTANT: - blendFactor.r = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[0])); - blendFactor.g = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[1])); - blendFactor.b = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[2])); + blendFactor.x = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[0])); + blendFactor.y = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[1])); + blendFactor.z = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[2])); break; case Context::BLEND_INVCONSTANT: - blendFactor.r = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[0])); - blendFactor.g = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[1])); - blendFactor.b = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[2])); + blendFactor.x = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[0])); + blendFactor.y = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[1])); + blendFactor.z = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[2])); break; default: ASSERT(false); } } - void PixelRoutine::blendFactorAlpha(Registers &r, const Color4f &blendFactor, const Color4f &oC, const Color4f &pixel, Context::BlendFactor blendFactorAlphaActive) + void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, Context::BlendFactor blendFactorAlphaActive) { switch(blendFactorAlphaActive) { @@ -3003,44 +3035,44 @@ // Optimized break; case Context::BLEND_SOURCE: - blendFactor.a = oC.a; + blendFactor.w = oC.w; break; case Context::BLEND_INVSOURCE: - blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a; + blendFactor.w = Float4(1.0f) - oC.w; break; case Context::BLEND_DEST: - blendFactor.a = pixel.a; + blendFactor.w = pixel.w; break; case Context::BLEND_INVDEST: - blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a; + blendFactor.w = Float4(1.0f) - pixel.w; break; case Context::BLEND_SOURCEALPHA: - blendFactor.a = oC.a; + blendFactor.w = oC.w; break; case Context::BLEND_INVSOURCEALPHA: - blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a; + blendFactor.w = Float4(1.0f) - oC.w; break; case Context::BLEND_DESTALPHA: - blendFactor.a = pixel.a; + blendFactor.w = pixel.w; break; case Context::BLEND_INVDESTALPHA: - blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a; + blendFactor.w = Float4(1.0f) - pixel.w; break; case Context::BLEND_SRCALPHASAT: - blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + blendFactor.w = Float4(1.0f); break; case Context::BLEND_CONSTANT: - blendFactor.a = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[3])); + blendFactor.w = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[3])); break; case Context::BLEND_INVCONSTANT: - blendFactor.a = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[3])); + blendFactor.w = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[3])); break; default: ASSERT(false); } } - void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Color4f &oC, Int &x) + void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x) { if(!state.alphaBlendActive) { @@ -3048,9 +3080,9 @@ } Pointer<Byte> buffer; - Color4f pixel; + Vector4f pixel; - Color4i color; + Vector4i color; Short4 c01; Short4 c23; @@ -3062,126 +3094,126 @@ c01 = *Pointer<Short4>(buffer); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); c23 = *Pointer<Short4>(buffer); - color.b = c01; - color.g = c01; - color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(c23)); - color.g = UnpackHigh(As<Byte8>(color.g), As<Byte8>(c23)); - color.r = color.b; - color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(color.g)); - color.r = UnpackHigh(As<Byte8>(color.r), As<Byte8>(color.g)); - color.g = color.b; - color.a = color.r; - color.r = UnpackLow(As<Byte8>(color.r), As<Byte8>(color.r)); - color.g = UnpackHigh(As<Byte8>(color.g), As<Byte8>(color.g)); - color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(color.b)); - color.a = UnpackHigh(As<Byte8>(color.a), As<Byte8>(color.a)); + color.z = c01; + color.y = c01; + color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(c23)); + color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(c23)); + color.x = color.z; + color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.y)); + color.x = UnpackHigh(As<Byte8>(color.x), As<Byte8>(color.y)); + color.y = color.z; + color.w = color.x; + color.x = UnpackLow(As<Byte8>(color.x), As<Byte8>(color.x)); + color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(color.y)); + color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.z)); + color.w = UnpackHigh(As<Byte8>(color.w), As<Byte8>(color.w)); - pixel.r = convertUnsigned16(As<UShort4>(color.r)); - pixel.g = convertUnsigned16(As<UShort4>(color.g)); - pixel.b = convertUnsigned16(As<UShort4>(color.b)); - pixel.a = convertUnsigned16(As<UShort4>(color.a)); + pixel.x = convertUnsigned16(As<UShort4>(color.x)); + pixel.y = convertUnsigned16(As<UShort4>(color.y)); + pixel.z = convertUnsigned16(As<UShort4>(color.z)); + pixel.w = convertUnsigned16(As<UShort4>(color.w)); break; case FORMAT_X8R8G8B8: buffer = cBuffer + 4 * x; c01 = *Pointer<Short4>(buffer); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); c23 = *Pointer<Short4>(buffer); - color.b = c01; - color.g = c01; - color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(c23)); - color.g = UnpackHigh(As<Byte8>(color.g), As<Byte8>(c23)); - color.r = color.b; - color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(color.g)); - color.r = UnpackHigh(As<Byte8>(color.r), As<Byte8>(color.g)); - color.g = color.b; - color.r = UnpackLow(As<Byte8>(color.r), As<Byte8>(color.r)); - color.g = UnpackHigh(As<Byte8>(color.g), As<Byte8>(color.g)); - color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(color.b)); + color.z = c01; + color.y = c01; + color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(c23)); + color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(c23)); + color.x = color.z; + color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.y)); + color.x = UnpackHigh(As<Byte8>(color.x), As<Byte8>(color.y)); + color.y = color.z; + color.x = UnpackLow(As<Byte8>(color.x), As<Byte8>(color.x)); + color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(color.y)); + color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.z)); - pixel.r = convertUnsigned16(As<UShort4>(color.r)); - pixel.g = convertUnsigned16(As<UShort4>(color.g)); - pixel.b = convertUnsigned16(As<UShort4>(color.b)); - pixel.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + pixel.x = convertUnsigned16(As<UShort4>(color.x)); + pixel.y = convertUnsigned16(As<UShort4>(color.y)); + pixel.z = convertUnsigned16(As<UShort4>(color.z)); + pixel.w = Float4(1.0f); break; case FORMAT_A8G8R8B8Q: UNIMPLEMENTED(); - // UnpackLow(pixel.b, qword_ptr [cBuffer+8*x+0]); - // UnpackHigh(pixel.r, qword_ptr [cBuffer+8*x+0]); - // UnpackLow(pixel.g, qword_ptr [cBuffer+8*x+8]); - // UnpackHigh(pixel.a, qword_ptr [cBuffer+8*x+8]); + // UnpackLow(pixel.z, qword_ptr [cBuffer+8*x+0]); + // UnpackHigh(pixel.x, qword_ptr [cBuffer+8*x+0]); + // UnpackLow(pixel.y, qword_ptr [cBuffer+8*x+8]); + // UnpackHigh(pixel.w, qword_ptr [cBuffer+8*x+8]); break; case FORMAT_X8G8R8B8Q: UNIMPLEMENTED(); - // UnpackLow(pixel.b, qword_ptr [cBuffer+8*x+0]); - // UnpackHigh(pixel.r, qword_ptr [cBuffer+8*x+0]); - // UnpackLow(pixel.g, qword_ptr [cBuffer+8*x+8]); - // pixel.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + // UnpackLow(pixel.z, qword_ptr [cBuffer+8*x+0]); + // UnpackHigh(pixel.x, qword_ptr [cBuffer+8*x+0]); + // UnpackLow(pixel.y, qword_ptr [cBuffer+8*x+8]); + // pixel.w = Short4(0xFFFFu); break; case FORMAT_A16B16G16R16: buffer = cBuffer; - color.r = *Pointer<Short4>(buffer + 8 * x); - color.g = *Pointer<Short4>(buffer + 8 * x + 8); + color.x = *Pointer<Short4>(buffer + 8 * x); + color.y = *Pointer<Short4>(buffer + 8 * x + 8); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); - color.b = *Pointer<Short4>(buffer + 8 * x); - color.a = *Pointer<Short4>(buffer + 8 * x + 8); + color.z = *Pointer<Short4>(buffer + 8 * x); + color.w = *Pointer<Short4>(buffer + 8 * x + 8); - transpose4x4(color.r, color.g, color.b, color.a); + transpose4x4(color.x, color.y, color.z, color.w); - pixel.r = convertUnsigned16(As<UShort4>(color.r)); - pixel.g = convertUnsigned16(As<UShort4>(color.g)); - pixel.b = convertUnsigned16(As<UShort4>(color.b)); - pixel.a = convertUnsigned16(As<UShort4>(color.a)); + pixel.x = convertUnsigned16(As<UShort4>(color.x)); + pixel.y = convertUnsigned16(As<UShort4>(color.y)); + pixel.z = convertUnsigned16(As<UShort4>(color.z)); + pixel.w = convertUnsigned16(As<UShort4>(color.w)); break; case FORMAT_G16R16: buffer = cBuffer; - color.r = *Pointer<Short4>(buffer + 4 * x); + color.x = *Pointer<Short4>(buffer + 4 * x); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); - color.g = *Pointer<Short4>(buffer + 4 * x); - color.b = color.r; - color.r = As<Short4>(UnpackLow(color.r, color.g)); - color.b = As<Short4>(UnpackHigh(color.b, color.g)); - color.g = color.b; - color.r = As<Short4>(UnpackLow(color.r, color.b)); - color.g = As<Short4>(UnpackHigh(color.g, color.b)); + color.y = *Pointer<Short4>(buffer + 4 * x); + color.z = color.x; + color.x = As<Short4>(UnpackLow(color.x, color.y)); + color.z = As<Short4>(UnpackHigh(color.z, color.y)); + color.y = color.z; + color.x = As<Short4>(UnpackLow(color.x, color.z)); + color.y = As<Short4>(UnpackHigh(color.y, color.z)); - pixel.r = convertUnsigned16(As<UShort4>(color.r)); - pixel.g = convertUnsigned16(As<UShort4>(color.g)); - pixel.b = Float4(1.0f, 1.0f, 1.0f, 1.0f); - pixel.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + pixel.x = convertUnsigned16(As<UShort4>(color.x)); + pixel.y = convertUnsigned16(As<UShort4>(color.y)); + pixel.z = Float4(1.0f); + pixel.w = Float4(1.0f); break; case FORMAT_R32F: buffer = cBuffer; // FIXME: movlps - pixel.r.x = *Pointer<Float>(buffer + 4 * x + 0); - pixel.r.y = *Pointer<Float>(buffer + 4 * x + 4); + pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0); + pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); // FIXME: movhps - pixel.r.z = *Pointer<Float>(buffer + 4 * x + 0); - pixel.r.w = *Pointer<Float>(buffer + 4 * x + 4); - pixel.g = Float4(1.0f, 1.0f, 1.0f, 1.0f); - pixel.b = Float4(1.0f, 1.0f, 1.0f, 1.0f); - pixel.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0); + pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4); + pixel.y = Float4(1.0f); + pixel.z = Float4(1.0f); + pixel.w = Float4(1.0f); break; case FORMAT_G32R32F: buffer = cBuffer; - pixel.r = *Pointer<Float4>(buffer + 8 * x, 16); + pixel.x = *Pointer<Float4>(buffer + 8 * x, 16); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); - pixel.g = *Pointer<Float4>(buffer + 8 * x, 16); - pixel.b = pixel.r; - pixel.r = ShuffleLowHigh(pixel.r, pixel.g, 0x88); - pixel.b = ShuffleLowHigh(pixel.b, pixel.g, 0xDD); - pixel.g = pixel.b; - pixel.b = Float4(1.0f, 1.0f, 1.0f, 1.0f); - pixel.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + pixel.y = *Pointer<Float4>(buffer + 8 * x, 16); + pixel.z = pixel.x; + pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88); + pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD); + pixel.y = pixel.z; + pixel.z = Float4(1.0f); + pixel.w = Float4(1.0f); break; case FORMAT_A32B32G32R32F: buffer = cBuffer; - pixel.r = *Pointer<Float4>(buffer + 16 * x, 16); - pixel.g = *Pointer<Float4>(buffer + 16 * x + 16, 16); + pixel.x = *Pointer<Float4>(buffer + 16 * x, 16); + pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16); buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); - pixel.b = *Pointer<Float4>(buffer + 16 * x, 16); - pixel.a = *Pointer<Float4>(buffer + 16 * x + 16, 16); - transpose4x4(pixel.r, pixel.g, pixel.b, pixel.a); + pixel.z = *Pointer<Float4>(buffer + 16 * x, 16); + pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16); + transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); break; default: ASSERT(false); @@ -3189,71 +3221,71 @@ if(postBlendSRGB && state.writeSRGB) { - sRGBtoLinear(pixel.r); - sRGBtoLinear(pixel.g); - sRGBtoLinear(pixel.b); + sRGBtoLinear(pixel.x); + sRGBtoLinear(pixel.y); + sRGBtoLinear(pixel.z); } // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor - Color4f sourceFactor; - Color4f destFactor; + Vector4f sourceFactor; + Vector4f destFactor; blendFactor(r, sourceFactor, oC, pixel, (Context::BlendFactor)state.sourceBlendFactor); blendFactor(r, destFactor, oC, pixel, (Context::BlendFactor)state.destBlendFactor); if(state.sourceBlendFactor != Context::BLEND_ONE && state.sourceBlendFactor != Context::BLEND_ZERO) { - oC.r *= sourceFactor.r; - oC.g *= sourceFactor.g; - oC.b *= sourceFactor.b; + oC.x *= sourceFactor.x; + oC.y *= sourceFactor.y; + oC.z *= sourceFactor.z; } if(state.destBlendFactor != Context::BLEND_ONE && state.destBlendFactor != Context::BLEND_ZERO) { - pixel.r *= destFactor.r; - pixel.g *= destFactor.g; - pixel.b *= destFactor.b; + pixel.x *= destFactor.x; + pixel.y *= destFactor.y; + pixel.z *= destFactor.z; } switch(state.blendOperation) { case Context::BLENDOP_ADD: - oC.r += pixel.r; - oC.g += pixel.g; - oC.b += pixel.b; + oC.x += pixel.x; + oC.y += pixel.y; + oC.z += pixel.z; break; case Context::BLENDOP_SUB: - oC.r -= pixel.r; - oC.g -= pixel.g; - oC.b -= pixel.b; + oC.x -= pixel.x; + oC.y -= pixel.y; + oC.z -= pixel.z; break; case Context::BLENDOP_INVSUB: - oC.r = pixel.r - oC.r; - oC.g = pixel.g - oC.g; - oC.b = pixel.b - oC.b; + oC.x = pixel.x - oC.x; + oC.y = pixel.y - oC.y; + oC.z = pixel.z - oC.z; break; case Context::BLENDOP_MIN: - oC.r = Min(oC.r, pixel.r); - oC.g = Min(oC.g, pixel.g); - oC.b = Min(oC.b, pixel.b); + oC.x = Min(oC.x, pixel.x); + oC.y = Min(oC.y, pixel.y); + oC.z = Min(oC.z, pixel.z); break; case Context::BLENDOP_MAX: - oC.r = Max(oC.r, pixel.r); - oC.g = Max(oC.g, pixel.g); - oC.b = Max(oC.b, pixel.b); + oC.x = Max(oC.x, pixel.x); + oC.y = Max(oC.y, pixel.y); + oC.z = Max(oC.z, pixel.z); break; case Context::BLENDOP_SOURCE: // No operation break; case Context::BLENDOP_DEST: - oC.r = pixel.r; - oC.g = pixel.g; - oC.b = pixel.b; + oC.x = pixel.x; + oC.y = pixel.y; + oC.z = pixel.z; break; case Context::BLENDOP_NULL: - oC.r = Float4(0.0f, 0.0f, 0.0f, 0.0f); - oC.g = Float4(0.0f, 0.0f, 0.0f, 0.0f); - oC.b = Float4(0.0f, 0.0f, 0.0f, 0.0f); + oC.x = Float4(0.0f); + oC.y = Float4(0.0f); + oC.z = Float4(0.0f); break; default: ASSERT(false); @@ -3264,54 +3296,54 @@ if(state.sourceBlendFactorAlpha != Context::BLEND_ONE && state.sourceBlendFactorAlpha != Context::BLEND_ZERO) { - oC.a *= sourceFactor.a; + oC.w *= sourceFactor.w; } if(state.destBlendFactorAlpha != Context::BLEND_ONE && state.destBlendFactorAlpha != Context::BLEND_ZERO) { - pixel.a *= destFactor.a; + pixel.w *= destFactor.w; } switch(state.blendOperationAlpha) { case Context::BLENDOP_ADD: - oC.a += pixel.a; + oC.w += pixel.w; break; case Context::BLENDOP_SUB: - oC.a -= pixel.a; + oC.w -= pixel.w; break; case Context::BLENDOP_INVSUB: - pixel.a -= oC.a; - oC.a = pixel.a; + pixel.w -= oC.w; + oC.w = pixel.w; break; case Context::BLENDOP_MIN: - oC.a = Min(oC.a, pixel.a); + oC.w = Min(oC.w, pixel.w); break; case Context::BLENDOP_MAX: - oC.a = Max(oC.a, pixel.a); + oC.w = Max(oC.w, pixel.w); break; case Context::BLENDOP_SOURCE: // No operation break; case Context::BLENDOP_DEST: - oC.a = pixel.a; + oC.w = pixel.w; break; case Context::BLENDOP_NULL: - oC.a = Float4(0.0f, 0.0f, 0.0f, 0.0f); + oC.w = Float4(0.0f); break; default: ASSERT(false); } } - void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Color4f &oC, Int &sMask, Int &zMask, Int &cMask) + void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask) { if(!state.colorWriteActive(index)) { return; } - Color4i color; + Vector4i color; switch(state.targetFormat[index]) { @@ -3325,13 +3357,13 @@ case FORMAT_R32F: break; case FORMAT_G32R32F: - oC.b = oC.r; - oC.r = UnpackLow(oC.r, oC.g); - oC.b = UnpackHigh(oC.b, oC.g); - oC.g = oC.b; + oC.z = oC.x; + oC.x = UnpackLow(oC.x, oC.y); + oC.z = UnpackHigh(oC.z, oC.y); + oC.y = oC.z; break; case FORMAT_A32B32G32R32F: - transpose4x4(oC.r, oC.g, oC.b, oC.a); + transpose4x4(oC.x, oC.y, oC.z, oC.w); break; default: ASSERT(false); @@ -3375,19 +3407,19 @@ value.z = *Pointer<Float>(buffer + 0); value.w = *Pointer<Float>(buffer + 4); - oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16)); - oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(value)); + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); // FIXME: movhps - *Pointer<Float>(buffer + 0) = oC.r.z; - *Pointer<Float>(buffer + 4) = oC.r.w; + *Pointer<Float>(buffer + 0) = oC.x.z; + *Pointer<Float>(buffer + 4) = oC.x.w; buffer -= *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); // FIXME: movlps - *Pointer<Float>(buffer + 0) = oC.r.x; - *Pointer<Float>(buffer + 4) = oC.r.y; + *Pointer<Float>(buffer + 0) = oC.x.x; + *Pointer<Float>(buffer + 4) = oC.x.y; } break; case FORMAT_G32R32F: @@ -3398,15 +3430,15 @@ if((rgbaWriteMask & 0x00000003) != 0x00000003) { Float4 masked = value; - oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0]))); - oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(masked)); + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); } - oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16)); - oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(value)); - *Pointer<Float4>(buffer) = oC.r; + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); + *Pointer<Float4>(buffer) = oC.x; buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); @@ -3417,15 +3449,15 @@ Float4 masked; masked = value; - oC.g = As<Float4>(As<Int4>(oC.g) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); + oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0]))); - oC.g = As<Float4>(As<Int4>(oC.g) | As<Int4>(masked)); + oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); } - oC.g = As<Float4>(As<Int4>(oC.g) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); + oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16)); - oC.g = As<Float4>(As<Int4>(oC.g) | As<Int4>(value)); - *Pointer<Float4>(buffer) = oC.g; + oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); + *Pointer<Float4>(buffer) = oC.y; break; case FORMAT_A32B32G32R32F: buffer = cBuffer + 16 * x; @@ -3436,15 +3468,15 @@ if(rgbaWriteMask != 0x0000000F) { Float4 masked = value; - oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); - oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(masked)); + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); } - oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); - oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(value)); - *Pointer<Float4>(buffer, 16) = oC.r; + oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); + *Pointer<Float4>(buffer, 16) = oC.x; } { @@ -3453,15 +3485,15 @@ if(rgbaWriteMask != 0x0000000F) { Float4 masked = value; - oC.g = As<Float4>(As<Int4>(oC.g) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); + oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); - oC.g = As<Float4>(As<Int4>(oC.g) | As<Int4>(masked)); + oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); } - oC.g = As<Float4>(As<Int4>(oC.g) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); + oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16)); - oC.g = As<Float4>(As<Int4>(oC.g) | As<Int4>(value)); - *Pointer<Float4>(buffer + 16, 16) = oC.g; + oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); + *Pointer<Float4>(buffer + 16, 16) = oC.y; } buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); @@ -3472,15 +3504,15 @@ if(rgbaWriteMask != 0x0000000F) { Float4 masked = value; - oC.b = As<Float4>(As<Int4>(oC.b) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); + oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); - oC.b = As<Float4>(As<Int4>(oC.b) | As<Int4>(masked)); + oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked)); } - oC.b = As<Float4>(As<Int4>(oC.b) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); + oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16)); - oC.b = As<Float4>(As<Int4>(oC.b) | As<Int4>(value)); - *Pointer<Float4>(buffer, 16) = oC.b; + oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value)); + *Pointer<Float4>(buffer, 16) = oC.z; } { @@ -3489,15 +3521,15 @@ if(rgbaWriteMask != 0x0000000F) { Float4 masked = value; - oC.a = As<Float4>(As<Int4>(oC.a) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); + oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); - oC.a = As<Float4>(As<Int4>(oC.a) | As<Int4>(masked)); + oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked)); } - oC.a = As<Float4>(As<Int4>(oC.a) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); + oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16)); - oC.a = As<Float4>(As<Int4>(oC.a) | As<Int4>(value)); - *Pointer<Float4>(buffer + 16, 16) = oC.a; + oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value)); + *Pointer<Float4>(buffer + 16, 16) = oC.w; } break; default: @@ -3508,88 +3540,91 @@ void PixelRoutine::ps_1_x(Registers &r, Int cMask[4]) { int pad = 0; // Count number of texm3x3pad instructions - Color4i dPairing; // Destination for first pairing instruction + Vector4i dPairing; // Destination for first pairing instruction - for(int i = 0; i < pixelShader->getLength(); i++) + for(int i = 0; i < shader->getLength(); i++) { - const ShaderInstruction *instruction = pixelShader->getInstruction(i); - Op::Opcode opcode = instruction->getOpcode(); + const Shader::Instruction *instruction = shader->getInstruction(i); + Shader::Opcode opcode = instruction->opcode; // #ifndef NDEBUG // FIXME: Centralize debug output control - // pixelShader->printInstruction(i, "debug.txt"); + // shader->printInstruction(i, "debug.txt"); // #endif - if(opcode == Op::OPCODE_DCL || opcode == Op::OPCODE_DEF || opcode == Op::OPCODE_DEFI || opcode == Op::OPCODE_DEFB) + if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) { continue; } - const Dst &dst = instruction->getDestinationParameter(); - const Src &src0 = instruction->getSourceParameter(0); - const Src &src1 = instruction->getSourceParameter(1); - const Src &src2 = instruction->getSourceParameter(2); - const Src &src3 = instruction->getSourceParameter(3); + const Dst &dst = instruction->dst; + const Src &src0 = instruction->src[0]; + const Src &src1 = instruction->src[1]; + const Src &src2 = instruction->src[2]; - bool pairing = i + 1 < pixelShader->getLength() && pixelShader->getInstruction(i + 1)->isCoissue(); // First instruction of pair - bool coissue = instruction->isCoissue(); // Second instruction of pair + unsigned short version = shader->getVersion(); + bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair + bool coissue = instruction->coissue; // Second instruction of pair - Color4i d; - Color4i s0; - Color4i s1; - Color4i s2; - Color4i s3; + Vector4i d; + Vector4i s0; + Vector4i s1; + Vector4i s2; - if(src0.type != Src::PARAMETER_VOID) s0 = regi(r, src0); - if(src1.type != Src::PARAMETER_VOID) s1 = regi(r, src1); - if(src2.type != Src::PARAMETER_VOID) s2 = regi(r, src2); - if(src3.type != Src::PARAMETER_VOID) s3 = regi(r, src3); + if(src0.type != Shader::PARAMETER_VOID) s0 = regi(r, src0); + if(src1.type != Shader::PARAMETER_VOID) s1 = regi(r, src1); + if(src2.type != Shader::PARAMETER_VOID) s2 = regi(r, src2); + + Float4 u = version < 0x0104 ? r.vf[2 + dst.index].x : r.vf[2 + src0.index].x; + Float4 v = version < 0x0104 ? r.vf[2 + dst.index].y : r.vf[2 + src0.index].y; + Float4 s = version < 0x0104 ? r.vf[2 + dst.index].z : r.vf[2 + src0.index].z; + Float4 t = version < 0x0104 ? r.vf[2 + dst.index].w : r.vf[2 + src0.index].w; switch(opcode) { - case Op::OPCODE_PS_1_0: break; - case Op::OPCODE_PS_1_1: break; - case Op::OPCODE_PS_1_2: break; - case Op::OPCODE_PS_1_3: break; - case Op::OPCODE_PS_1_4: break; + case Shader::OPCODE_PS_1_0: break; + case Shader::OPCODE_PS_1_1: break; + case Shader::OPCODE_PS_1_2: break; + case Shader::OPCODE_PS_1_3: break; + case Shader::OPCODE_PS_1_4: break; - case Op::OPCODE_DEF: break; + case Shader::OPCODE_DEF: break; - case Op::OPCODE_NOP: break; - case Op::OPCODE_MOV: MOV(d, s0); break; - case Op::OPCODE_ADD: ADD(d, s0, s1); break; - case Op::OPCODE_SUB: SUB(d, s0, s1); break; - case Op::OPCODE_MAD: MAD(d, s0, s1, s2); break; - case Op::OPCODE_MUL: MUL(d, s0, s1); break; - case Op::OPCODE_DP3: DP3(d, s0, s1); break; - case Op::OPCODE_DP4: DP4(d, s0, s1); break; - case Op::OPCODE_LRP: LRP(d, s0, s1, s2); break; - case Op::OPCODE_TEXCOORD: - if(pixelShader->getVersion() < 0x0104) + case Shader::OPCODE_NOP: break; + case Shader::OPCODE_MOV: MOV(d, s0); break; + case Shader::OPCODE_ADD: ADD(d, s0, s1); break; + case Shader::OPCODE_SUB: SUB(d, s0, s1); break; + case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break; + case Shader::OPCODE_MUL: MUL(d, s0, s1); break; + case Shader::OPCODE_DP3: DP3(d, s0, s1); break; + case Shader::OPCODE_DP4: DP4(d, s0, s1); break; + case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break; + case Shader::OPCODE_TEXCOORD: + if(version < 0x0104) { - TEXCOORD(d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index); + TEXCOORD(d, u, v, s, dst.index); } else { if((src0.swizzle & 0x30) == 0x20) // .xyz { - TEXCRD(d, Float4(r.vx[2 + src0.index]), Float4(r.vy[2 + src0.index]), Float4(r.vz[2 + src0.index]), src0.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW); + TEXCRD(d, u, v, s, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); } else // .xyw { - TEXCRD(d, Float4(r.vx[2 + src0.index]), Float4(r.vy[2 + src0.index]), Float4(r.vw[2 + src0.index]), src0.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW); + TEXCRD(d, u, v, t, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); } } break; - case Op::OPCODE_TEXKILL: - if(pixelShader->getVersion() < 0x0104) + case Shader::OPCODE_TEXKILL: + if(version < 0x0104) { - TEXKILL(cMask, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index])); + TEXKILL(cMask, u, v, s); } - else if(pixelShader->getVersion() == 0x0104) + else if(version == 0x0104) { - if(dst.type == Dst::PARAMETER_TEXTURE) + if(dst.type == Shader::PARAMETER_TEXTURE) { - TEXKILL(cMask, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index])); + TEXKILL(cMask, u, v, s); } else { @@ -3598,92 +3633,92 @@ } else ASSERT(false); break; - case Op::OPCODE_TEX: - if(pixelShader->getVersion() < 0x0104) + case Shader::OPCODE_TEX: + if(version < 0x0104) { - TEX(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, false); + TEX(r, d, u, v, s, dst.index, false); } - else if(pixelShader->getVersion() == 0x0104) + else if(version == 0x0104) { - if(src0.type == Src::PARAMETER_TEXTURE) + if(src0.type == Shader::PARAMETER_TEXTURE) { if((src0.swizzle & 0x30) == 0x20) // .xyz { - TEX(r, d, Float4(r.vx[2 + src0.index]), Float4(r.vy[2 + src0.index]), Float4(r.vz[2 + src0.index]), dst.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW); + TEX(r, d, u, v, s, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); } else // .xyw { - TEX(r, d, Float4(r.vx[2 + src0.index]), Float4(r.vy[2 + src0.index]), Float4(r.vw[2 + src0.index]), dst.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW); + TEX(r, d, u, v, t, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); } } else { - TEXLD(r, d, s0, dst.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW); + TEXLD(r, d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); } } else ASSERT(false); break; - case Op::OPCODE_TEXBEM: TEXBEM(r, d, s0, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index); break; - case Op::OPCODE_TEXBEML: TEXBEML(r, d, s0, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index); break; - case Op::OPCODE_TEXREG2AR: TEXREG2AR(r, d, s0, dst.index); break; - case Op::OPCODE_TEXREG2GB: TEXREG2GB(r, d, s0, dst.index); break; - case Op::OPCODE_TEXM3X2PAD: TEXM3X2PAD(r, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0, 0, src0.modifier == Src::MODIFIER_SIGN); break; - case Op::OPCODE_TEXM3X2TEX: TEXM3X2TEX(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0, src0.modifier == Src::MODIFIER_SIGN); break; - case Op::OPCODE_TEXM3X3PAD: TEXM3X3PAD(r, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0, pad++ % 2, src0.modifier == Src::MODIFIER_SIGN); break; - case Op::OPCODE_TEXM3X3TEX: TEXM3X3TEX(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0, src0.modifier == Src::MODIFIER_SIGN); break; - case Op::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0, s1); break; - case Op::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0); break; - case Op::OPCODE_CND: CND(d, s0, s1, s2); break; - case Op::OPCODE_TEXREG2RGB: TEXREG2RGB(r, d, s0, dst.index); break; - case Op::OPCODE_TEXDP3TEX: TEXDP3TEX(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0); break; - case Op::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0, src0.modifier == Src::MODIFIER_SIGN); break; - case Op::OPCODE_TEXDP3: TEXDP3(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0); break; - case Op::OPCODE_TEXM3X3: TEXM3X3(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0, src0.modifier == Src::MODIFIER_SIGN); break; - case Op::OPCODE_TEXDEPTH: TEXDEPTH(r); break; - case Op::OPCODE_CMP: CMP(d, s0, s1, s2); break; - case Op::OPCODE_BEM: BEM(r, d, s0, s1, dst.index); break; - case Op::OPCODE_PHASE: break; - case Op::OPCODE_END: break; + case Shader::OPCODE_TEXBEM: TEXBEM(r, d, s0, u, v, s, dst.index); break; + case Shader::OPCODE_TEXBEML: TEXBEML(r, d, s0, u, v, s, dst.index); break; + case Shader::OPCODE_TEXREG2AR: TEXREG2AR(r, d, s0, dst.index); break; + case Shader::OPCODE_TEXREG2GB: TEXREG2GB(r, d, s0, dst.index); break; + case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(r, u, v, s, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break; + case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(r, d, u, v, s, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break; + case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(r, u, v, s, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break; + case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(r, d, u, v, s, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break; + case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(r, d, u, v, s, dst.index, s0, s1); break; + case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(r, d, u, v, s, dst.index, s0); break; + case Shader::OPCODE_CND: CND(d, s0, s1, s2); break; + case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(r, d, s0, dst.index); break; + case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(r, d, u, v, s, dst.index, s0); break; + case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(r, d, u, v, s, s0, src0.modifier == Shader::MODIFIER_SIGN); break; + case Shader::OPCODE_TEXDP3: TEXDP3(r, d, u, v, s, s0); break; + case Shader::OPCODE_TEXM3X3: TEXM3X3(r, d, u, v, s, s0, src0.modifier == Shader::MODIFIER_SIGN); break; + case Shader::OPCODE_TEXDEPTH: TEXDEPTH(r); break; + case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break; + case Shader::OPCODE_BEM: BEM(r, d, s0, s1, dst.index); break; + case Shader::OPCODE_PHASE: break; + case Shader::OPCODE_END: break; default: ASSERT(false); } - if(dst.type != Dst::PARAMETER_VOID && opcode != Op::OPCODE_TEXKILL) + if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL) { if(dst.shift > 0) { - if(dst.mask & 0x1) {d.r = AddSat(d.r, d.r); if(dst.shift > 1) d.r = AddSat(d.r, d.r); if(dst.shift > 2) d.r = AddSat(d.r, d.r);} - if(dst.mask & 0x2) {d.g = AddSat(d.g, d.g); if(dst.shift > 1) d.g = AddSat(d.g, d.g); if(dst.shift > 2) d.g = AddSat(d.g, d.g);} - if(dst.mask & 0x4) {d.b = AddSat(d.b, d.b); if(dst.shift > 1) d.b = AddSat(d.b, d.b); if(dst.shift > 2) d.b = AddSat(d.b, d.b);} - if(dst.mask & 0x8) {d.a = AddSat(d.a, d.a); if(dst.shift > 1) d.a = AddSat(d.a, d.a); if(dst.shift > 2) d.a = AddSat(d.a, d.a);} + if(dst.mask & 0x1) {d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x);} + if(dst.mask & 0x2) {d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y);} + if(dst.mask & 0x4) {d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z);} + if(dst.mask & 0x8) {d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w);} } else if(dst.shift < 0) { - if(dst.mask & 0x1) d.r = d.r >> -dst.shift; - if(dst.mask & 0x2) d.g = d.g >> -dst.shift; - if(dst.mask & 0x4) d.b = d.b >> -dst.shift; - if(dst.mask & 0x8) d.a = d.a >> -dst.shift; + if(dst.mask & 0x1) d.x = d.x >> -dst.shift; + if(dst.mask & 0x2) d.y = d.y >> -dst.shift; + if(dst.mask & 0x4) d.z = d.z >> -dst.shift; + if(dst.mask & 0x8) d.w = d.w >> -dst.shift; } if(dst.saturate) { - if(dst.mask & 0x1) {d.r = Min(d.r, Short4(0x1000, 0x1000, 0x1000, 0x1000)); d.r = Max(d.r, Short4(0x0000, 0x0000, 0x0000, 0x0000));} - if(dst.mask & 0x2) {d.g = Min(d.g, Short4(0x1000, 0x1000, 0x1000, 0x1000)); d.g = Max(d.g, Short4(0x0000, 0x0000, 0x0000, 0x0000));} - if(dst.mask & 0x4) {d.b = Min(d.b, Short4(0x1000, 0x1000, 0x1000, 0x1000)); d.b = Max(d.b, Short4(0x0000, 0x0000, 0x0000, 0x0000));} - if(dst.mask & 0x8) {d.a = Min(d.a, Short4(0x1000, 0x1000, 0x1000, 0x1000)); d.a = Max(d.a, Short4(0x0000, 0x0000, 0x0000, 0x0000));} + if(dst.mask & 0x1) {d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));} + if(dst.mask & 0x2) {d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));} + if(dst.mask & 0x4) {d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));} + if(dst.mask & 0x8) {d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));} } if(pairing) { - if(dst.mask & 0x1) dPairing.r = d.r; - if(dst.mask & 0x2) dPairing.g = d.g; - if(dst.mask & 0x4) dPairing.b = d.b; - if(dst.mask & 0x8) dPairing.a = d.a; + if(dst.mask & 0x1) dPairing.x = d.x; + if(dst.mask & 0x2) dPairing.y = d.y; + if(dst.mask & 0x4) dPairing.z = d.z; + if(dst.mask & 0x8) dPairing.w = d.w; } if(coissue) { - const Dst &dst = pixelShader->getInstruction(i - 1)->getDestinationParameter(); + const Dst &dst = shader->getInstruction(i - 1)->dst; writeDestination(r, dPairing, dst); } @@ -3700,47 +3735,60 @@ { r.enableIndex = 0; r.stackIndex = 0; - - for(int i = 0; i < pixelShader->getLength(); i++) + + bool out[4][4] = {false}; + + // Create all call site return blocks up front + for(int i = 0; i < shader->getLength(); i++) { - const ShaderInstruction *instruction = pixelShader->getInstruction(i); - Op::Opcode opcode = instruction->getOpcode(); + const Shader::Instruction *instruction = shader->getInstruction(i); + Shader::Opcode opcode = instruction->opcode; - // #ifndef NDEBUG // FIXME: Centralize debug output control - // pixelShader->printInstruction(i, "debug.txt"); - // #endif + if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) + { + const Dst &dst = instruction->dst; - if(opcode == Op::OPCODE_DCL || opcode == Op::OPCODE_DEF || opcode == Op::OPCODE_DEFI || opcode == Op::OPCODE_DEFB) + ASSERT(callRetBlock[dst.label].size() == dst.callSite); + callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); + } + } + + for(int i = 0; i < shader->getLength(); i++) + { + const Shader::Instruction *instruction = shader->getInstruction(i); + Shader::Opcode opcode = instruction->opcode; + + if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) { continue; } - const Dst &dst = instruction->getDestinationParameter(); - const Src &src0 = instruction->getSourceParameter(0); - const Src &src1 = instruction->getSourceParameter(1); - const Src &src2 = instruction->getSourceParameter(2); - const Src &src3 = instruction->getSourceParameter(3); + const Dst &dst = instruction->dst; + const Src &src0 = instruction->src[0]; + const Src &src1 = instruction->src[1]; + const Src &src2 = instruction->src[2]; + const Src &src3 = instruction->src[3]; - bool predicate = instruction->isPredicate(); - Control control = instruction->getControl(); + bool predicate = instruction->predicate; + Control control = instruction->control; bool pp = dst.partialPrecision; - bool project = instruction->isProject(); - bool bias = instruction->isBias(); + bool project = instruction->project; + bool bias = instruction->bias; - Color4f d; - Color4f s0; - Color4f s1; - Color4f s2; - Color4f s3; + Vector4f d; + Vector4f s0; + Vector4f s1; + Vector4f s2; + Vector4f s3; - if(opcode == Op::OPCODE_TEXKILL) + if(opcode == Shader::OPCODE_TEXKILL) // Takes destination as input { - if(dst.type == Dst::PARAMETER_TEXTURE) + if(dst.type == Shader::PARAMETER_TEXTURE) { - d.x = r.vx[2 + dst.index]; - d.y = r.vy[2 + dst.index]; - d.z = r.vz[2 + dst.index]; - d.w = r.vw[2 + dst.index]; + d.x = r.vf[2 + dst.index].x; + d.y = r.vf[2 + dst.index].y; + d.z = r.vf[2 + dst.index].z; + d.w = r.vf[2 + dst.index].w; } else { @@ -3748,120 +3796,212 @@ } } - if(src0.type != Src::PARAMETER_VOID) s0 = reg(r, src0); - if(src1.type != Src::PARAMETER_VOID) s1 = reg(r, src1); - if(src2.type != Src::PARAMETER_VOID) s2 = reg(r, src2); - if(src3.type != Src::PARAMETER_VOID) s3 = reg(r, src3); + if(src0.type != Shader::PARAMETER_VOID) s0 = reg(r, src0); + if(src1.type != Shader::PARAMETER_VOID) s1 = reg(r, src1); + if(src2.type != Shader::PARAMETER_VOID) s2 = reg(r, src2); + if(src3.type != Shader::PARAMETER_VOID) s3 = reg(r, src3); switch(opcode) { - case Op::OPCODE_PS_2_0: break; - case Op::OPCODE_PS_2_x: break; - case Op::OPCODE_PS_3_0: break; - case Op::OPCODE_DEF: break; - case Op::OPCODE_DCL: break; - case Op::OPCODE_NOP: break; - case Op::OPCODE_MOV: mov(d, s0); break; - case Op::OPCODE_ADD: add(d, s0, s1); break; - case Op::OPCODE_SUB: sub(d, s0, s1); break; - case Op::OPCODE_MUL: mul(d, s0, s1); break; - case Op::OPCODE_MAD: mad(d, s0, s1, s2); break; - case Op::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; - case Op::OPCODE_DP3: dp3(d, s0, s1); break; - case Op::OPCODE_DP4: dp4(d, s0, s1); break; - case Op::OPCODE_CMP: cmp(d, s0, s1, s2); break; - case Op::OPCODE_FRC: frc(d, s0); break; - case Op::OPCODE_EXP: exp(d, s0, pp); break; - case Op::OPCODE_LOG: log(d, s0, pp); break; - case Op::OPCODE_RCP: rcp(d, s0, pp); break; - case Op::OPCODE_RSQ: rsq(d, s0, pp); break; - case Op::OPCODE_MIN: min(d, s0, s1); break; - case Op::OPCODE_MAX: max(d, s0, s1); break; - case Op::OPCODE_LRP: lrp(d, s0, s1, s2); break; - case Op::OPCODE_POW: pow(d, s0, s1, pp); break; - case Op::OPCODE_CRS: crs(d, s0, s1); break; - case Op::OPCODE_NRM: nrm(d, s0, pp); break; - case Op::OPCODE_ABS: abs(d, s0); break; - case Op::OPCODE_SINCOS: sincos(d, s0, pp); break; - case Op::OPCODE_M4X4: M4X4(r, d, s0, src1); break; - case Op::OPCODE_M4X3: M4X3(r, d, s0, src1); break; - case Op::OPCODE_M3X4: M3X4(r, d, s0, src1); break; - case Op::OPCODE_M3X3: M3X3(r, d, s0, src1); break; - case Op::OPCODE_M3X2: M3X2(r, d, s0, src1); break; - case Op::OPCODE_TEX: TEXLD(r, d, s0, src1, project, bias); break; - case Op::OPCODE_TEXLDD: TEXLDD(r, d, s0, src1, s2, s3, project, bias); break; - case Op::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1, project, bias); break; - case Op::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; - case Op::OPCODE_DSX: DSX(d, s0); break; - case Op::OPCODE_DSY: DSY(d, s0); break; - case Op::OPCODE_BREAK: BREAK(r); break; - case Op::OPCODE_BREAKC: BREAKC(r, s0, s1, control); break; - case Op::OPCODE_BREAKP: BREAKP(r, src0); break; - case Op::OPCODE_CALL: CALL(r, dst.index); break; - case Op::OPCODE_CALLNZ: CALLNZ(r, dst.index, src0); break; - case Op::OPCODE_ELSE: ELSE(r); break; - case Op::OPCODE_ENDIF: ENDIF(r); break; - case Op::OPCODE_ENDLOOP: ENDLOOP(r); break; - case Op::OPCODE_ENDREP: ENDREP(r); break; - case Op::OPCODE_IF: IF(r, src0); break; - case Op::OPCODE_IFC: IFC(r, s0, s1, control); break; - case Op::OPCODE_LABEL: LABEL(dst.index); break; - case Op::OPCODE_LOOP: LOOP(r, src1); break; - case Op::OPCODE_REP: REP(r, src0); break; - case Op::OPCODE_RET: RET(r); break; - case Op::OPCODE_SETP: setp(d, s0, s1, control); break; - case Op::OPCODE_END: break; + case Shader::OPCODE_PS_2_0: break; + case Shader::OPCODE_PS_2_x: break; + case Shader::OPCODE_PS_3_0: break; + case Shader::OPCODE_DEF: break; + case Shader::OPCODE_DCL: break; + case Shader::OPCODE_NOP: break; + case Shader::OPCODE_MOV: mov(d, s0); break; + case Shader::OPCODE_F2B: f2b(d, s0); break; + case Shader::OPCODE_B2F: b2f(d, s0); break; + case Shader::OPCODE_ADD: add(d, s0, s1); break; + case Shader::OPCODE_SUB: sub(d, s0, s1); break; + case Shader::OPCODE_MUL: mul(d, s0, s1); break; + case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; + case Shader::OPCODE_DP1: dp1(d, s0, s1); break; + case Shader::OPCODE_DP2: dp2(d, s0, s1); break; + case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; + case Shader::OPCODE_DP3: dp3(d, s0, s1); break; + case Shader::OPCODE_DP4: dp4(d, s0, s1); break; + case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break; + case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; + case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; + case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; + case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; + case Shader::OPCODE_FRC: frc(d, s0); break; + case Shader::OPCODE_TRUNC: trunc(d, s0); break; + case Shader::OPCODE_FLOOR: floor(d, s0); break; + case Shader::OPCODE_CEIL: ceil(d, s0); break; + case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; + case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; + case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; + case Shader::OPCODE_LOG2: log2(d, s0, pp); break; + case Shader::OPCODE_EXP: exp(d, s0, pp); break; + case Shader::OPCODE_LOG: log(d, s0, pp); break; + case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; + case Shader::OPCODE_DIV: div(d, s0, s1); break; + case Shader::OPCODE_MOD: mod(d, s0, s1); break; + case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; + case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; + case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; + case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; + case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; + case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; + case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; + case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; + case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; + case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; + case Shader::OPCODE_MIN: min(d, s0, s1); break; + case Shader::OPCODE_MAX: max(d, s0, s1); break; + case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; + case Shader::OPCODE_STEP: step(d, s0, s1); break; + case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; + case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; + case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; + case Shader::OPCODE_SGN: sgn(d, s0); break; + case Shader::OPCODE_CRS: crs(d, s0, s1); break; + case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; + case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; + case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; + case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; + case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; + case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; + case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; + case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; + case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; + case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; + case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; + case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; + case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; + case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; + case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; + case Shader::OPCODE_ABS: abs(d, s0); break; + case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; + case Shader::OPCODE_COS: cos(d, s0, pp); break; + case Shader::OPCODE_SIN: sin(d, s0, pp); break; + case Shader::OPCODE_TAN: tan(d, s0, pp); break; + case Shader::OPCODE_ACOS: acos(d, s0, pp); break; + case Shader::OPCODE_ASIN: asin(d, s0, pp); break; + case Shader::OPCODE_ATAN: atan(d, s0, pp); break; + case Shader::OPCODE_ATAN2: atan2(d, s0, s1, pp); break; + case Shader::OPCODE_M4X4: M4X4(r, d, s0, src1); break; + case Shader::OPCODE_M4X3: M4X3(r, d, s0, src1); break; + case Shader::OPCODE_M3X4: M3X4(r, d, s0, src1); break; + case Shader::OPCODE_M3X3: M3X3(r, d, s0, src1); break; + case Shader::OPCODE_M3X2: M3X2(r, d, s0, src1); break; + case Shader::OPCODE_TEX: TEXLD(r, d, s0, src1, project, bias); break; + case Shader::OPCODE_TEXLDD: TEXLDD(r, d, s0, src1, s2, s3, project, bias); break; + case Shader::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1, project, bias); break; + case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; + case Shader::OPCODE_DISCARD: DISCARD(r, cMask, instruction); break; + case Shader::OPCODE_DFDX: DFDX(d, s0); break; + case Shader::OPCODE_DFDY: DFDY(d, s0); break; + case Shader::OPCODE_FWIDTH: FWIDTH(d, s0); break; + case Shader::OPCODE_BREAK: BREAK(r); break; + case Shader::OPCODE_BREAKC: BREAKC(r, s0, s1, control); break; + case Shader::OPCODE_BREAKP: BREAKP(r, src0); break; + case Shader::OPCODE_CONTINUE: CONTINUE(r); break; + case Shader::OPCODE_TEST: TEST(); break; + case Shader::OPCODE_CALL: CALL(r, dst.label, dst.callSite); break; + case Shader::OPCODE_CALLNZ: CALLNZ(r, dst.label, dst.callSite, src0); break; + case Shader::OPCODE_ELSE: ELSE(r); break; + case Shader::OPCODE_ENDIF: ENDIF(r); break; + case Shader::OPCODE_ENDLOOP: ENDLOOP(r); break; + case Shader::OPCODE_ENDREP: ENDREP(r); break; + case Shader::OPCODE_ENDWHILE: ENDWHILE(r); break; + case Shader::OPCODE_IF: IF(r, src0); break; + case Shader::OPCODE_IFC: IFC(r, s0, s1, control); break; + case Shader::OPCODE_LABEL: LABEL(dst.index); break; + case Shader::OPCODE_LOOP: LOOP(r, src1); break; + case Shader::OPCODE_REP: REP(r, src0); break; + case Shader::OPCODE_WHILE: WHILE(r, src0); break; + case Shader::OPCODE_RET: RET(r); break; + case Shader::OPCODE_LEAVE: LEAVE(r); break; + case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; + case Shader::OPCODE_ALL: all(d.x, s0); break; + case Shader::OPCODE_ANY: any(d.x, s0); break; + case Shader::OPCODE_NOT: not(d, s0); break; + case Shader::OPCODE_OR: or(d.x, s0.x, s1.x); break; + case Shader::OPCODE_XOR: xor(d.x, s0.x, s1.x); break; + case Shader::OPCODE_AND: and(d.x, s0.x, s1.x); break; + case Shader::OPCODE_END: break; default: ASSERT(false); } - if(dst.type != Dst::PARAMETER_VOID && dst.type != Dst::PARAMETER_LABEL && opcode != Op::OPCODE_TEXKILL) + if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP) { - if(dst.saturate) + if(dst.integer) { - if(dst.x) d.r = Max(d.r, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - if(dst.y) d.g = Max(d.g, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - if(dst.z) d.b = Max(d.b, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - if(dst.w) d.a = Max(d.a, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - - if(dst.x) d.r = Min(d.r, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - if(dst.y) d.g = Min(d.g, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - if(dst.z) d.b = Min(d.b, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - if(dst.w) d.a = Min(d.a, Float4(1.0f, 1.0f, 1.0f, 1.0f)); + switch(opcode) + { + case Shader::OPCODE_DIV: + if(dst.x) d.x = Trunc(d.x); + if(dst.y) d.y = Trunc(d.y); + if(dst.z) d.z = Trunc(d.z); + if(dst.w) d.w = Trunc(d.w); + break; + default: + break; // No truncation to integer required when arguments are integer + } } - if(pixelShader->containsDynamicBranching()) + if(dst.saturate) { - Color4f pDst; // FIXME: Rename + if(dst.x) d.x = Max(d.x, Float4(0.0f)); + if(dst.y) d.y = Max(d.y, Float4(0.0f)); + if(dst.z) d.z = Max(d.z, Float4(0.0f)); + if(dst.w) d.w = Max(d.w, Float4(0.0f)); + + if(dst.x) d.x = Min(d.x, Float4(1.0f)); + if(dst.y) d.y = Min(d.y, Float4(1.0f)); + if(dst.z) d.z = Min(d.z, Float4(1.0f)); + if(dst.w) d.w = Min(d.w, Float4(1.0f)); + } + + if(shader->containsDynamicBranching()) + { + Vector4f pDst; // FIXME: Rename switch(dst.type) { - case Dst::PARAMETER_TEMP: - if(dst.x) pDst.x = r.rf[dst.index].x; - if(dst.y) pDst.y = r.rf[dst.index].y; - if(dst.z) pDst.z = r.rf[dst.index].z; - if(dst.w) pDst.w = r.rf[dst.index].w; + case Shader::PARAMETER_TEMP: + if(dst.rel.type == Shader::PARAMETER_VOID) + { + if(dst.x) pDst.x = r.rf[dst.index].x; + if(dst.y) pDst.y = r.rf[dst.index].y; + if(dst.z) pDst.z = r.rf[dst.index].z; + if(dst.w) pDst.w = r.rf[dst.index].w; + } + else + { + Int a = relativeAddress(r, dst); + + if(dst.x) pDst.x = r.rf[dst.index + a].x; + if(dst.y) pDst.y = r.rf[dst.index + a].y; + if(dst.z) pDst.z = r.rf[dst.index + a].z; + if(dst.w) pDst.w = r.rf[dst.index + a].w; + } break; - case Dst::PARAMETER_COLOROUT: + case Shader::PARAMETER_COLOROUT: + ASSERT(dst.rel.type == Shader::PARAMETER_VOID); if(dst.x) pDst.x = r.oC[dst.index].x; if(dst.y) pDst.y = r.oC[dst.index].y; if(dst.z) pDst.z = r.oC[dst.index].z; if(dst.w) pDst.w = r.oC[dst.index].w; break; - case Dst::PARAMETER_PREDICATE: + case Shader::PARAMETER_PREDICATE: if(dst.x) pDst.x = r.p0.x; if(dst.y) pDst.y = r.p0.y; if(dst.z) pDst.z = r.p0.z; if(dst.w) pDst.w = r.p0.w; break; - case Dst::PARAMETER_DEPTHOUT: + case Shader::PARAMETER_DEPTHOUT: pDst.x = r.oDepth; break; default: ASSERT(false); } - Int4 enable = r.enableStack[r.enableIndex] & r.enableBreak; + Int4 enable = enableMask(r, instruction); Int4 xEnable = enable; Int4 yEnable = enable; @@ -3870,14 +4010,14 @@ if(predicate) { - unsigned char pSwizzle = instruction->getPredicateSwizzle(); + unsigned char pSwizzle = instruction->predicateSwizzle; Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03]; Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03]; Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03]; Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03]; - if(!instruction->isPredicateNot()) + if(!instruction->predicateNot) { if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); @@ -3906,25 +4046,38 @@ switch(dst.type) { - case Dst::PARAMETER_TEMP: - if(dst.x) r.rf[dst.index].x = d.x; - if(dst.y) r.rf[dst.index].y = d.y; - if(dst.z) r.rf[dst.index].z = d.z; - if(dst.w) r.rf[dst.index].w = d.w; + case Shader::PARAMETER_TEMP: + if(dst.rel.type == Shader::PARAMETER_VOID) + { + if(dst.x) r.rf[dst.index].x = d.x; + if(dst.y) r.rf[dst.index].y = d.y; + if(dst.z) r.rf[dst.index].z = d.z; + if(dst.w) r.rf[dst.index].w = d.w; + } + else + { + Int a = relativeAddress(r, dst); + + if(dst.x) r.rf[dst.index + a].x = d.x; + if(dst.y) r.rf[dst.index + a].y = d.y; + if(dst.z) r.rf[dst.index + a].z = d.z; + if(dst.w) r.rf[dst.index + a].w = d.w; + } break; - case Dst::PARAMETER_COLOROUT: - if(dst.x) r.oC[dst.index].x = d.x; - if(dst.y) r.oC[dst.index].y = d.y; - if(dst.z) r.oC[dst.index].z = d.z; - if(dst.w) r.oC[dst.index].w = d.w; + case Shader::PARAMETER_COLOROUT: + ASSERT(dst.rel.type == Shader::PARAMETER_VOID); + if(dst.x) {r.oC[dst.index].x = d.x; out[dst.index][0] = true;} + if(dst.y) {r.oC[dst.index].y = d.y; out[dst.index][1] = true;} + if(dst.z) {r.oC[dst.index].z = d.z; out[dst.index][2] = true;} + if(dst.w) {r.oC[dst.index].w = d.w; out[dst.index][3] = true;} break; - case Dst::PARAMETER_PREDICATE: + case Shader::PARAMETER_PREDICATE: if(dst.x) r.p0.x = d.x; if(dst.y) r.p0.y = d.y; if(dst.z) r.p0.z = d.z; if(dst.w) r.p0.w = d.w; break; - case Dst::PARAMETER_DEPTHOUT: + case Shader::PARAMETER_DEPTHOUT: r.oDepth = d.x; break; default: @@ -3933,36 +4086,47 @@ } } - if(returns) + if(currentLabel != -1) { Nucleus::setInsertBlock(returnBlock); } + + for(int i = 0; i < 4; i++) + { + if((Format)state.targetFormat[i] != FORMAT_NULL) + { + if(!out[i][0]) r.oC[i].x = Float4(0.0f); + if(!out[i][1]) r.oC[i].y = Float4(0.0f); + if(!out[i][2]) r.oC[i].z = Float4(0.0f); + if(!out[i][3]) r.oC[i].w = Float4(0.0f); + } + } } - Short4 PixelRoutine::convertFixed12(Float4 &cf) + Short4 PixelRoutine::convertFixed12(RValue<Float4> cf) { - return RoundShort4(cf * Float4(0x1000, 0x1000, 0x1000, 0x1000)); + return RoundShort4(cf * Float4(0x1000)); } - void PixelRoutine::convertFixed12(Color4i &ci, Color4f &cf) + void PixelRoutine::convertFixed12(Vector4i &ci, Vector4f &cf) { - ci.r = convertFixed12(cf.r); - ci.g = convertFixed12(cf.g); - ci.b = convertFixed12(cf.b); - ci.a = convertFixed12(cf.a); + ci.x = convertFixed12(cf.x); + ci.y = convertFixed12(cf.y); + ci.z = convertFixed12(cf.z); + ci.w = convertFixed12(cf.w); } UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate) { - return UShort4(cf * Float4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF), saturate); + return UShort4(cf * Float4(0xFFFF), saturate); } - void PixelRoutine::convertFixed16(Color4i &ci, Color4f &cf, bool saturate) + void PixelRoutine::convertFixed16(Vector4i &ci, Vector4f &cf, bool saturate) { - ci.r = convertFixed16(cf.r, saturate); - ci.g = convertFixed16(cf.g, saturate); - ci.b = convertFixed16(cf.b, saturate); - ci.a = convertFixed16(cf.a, saturate); + ci.x = convertFixed16(cf.x, saturate); + ci.y = convertFixed16(cf.y, saturate); + ci.z = convertFixed16(cf.z, saturate); + ci.w = convertFixed16(cf.w, saturate); } Float4 PixelRoutine::convertSigned12(Short4 &ci) @@ -3970,75 +4134,75 @@ return Float4(ci) * Float4(1.0f / 0x0FFE); } - void PixelRoutine::convertSigned12(Color4f &cf, Color4i &ci) + void PixelRoutine::convertSigned12(Vector4f &cf, Vector4i &ci) { - cf.r = convertSigned12(ci.r); - cf.g = convertSigned12(ci.g); - cf.b = convertSigned12(ci.b); - cf.a = convertSigned12(ci.a); + cf.x = convertSigned12(ci.x); + cf.y = convertSigned12(ci.y); + cf.z = convertSigned12(ci.z); + cf.w = convertSigned12(ci.w); } Float4 PixelRoutine::convertUnsigned16(UShort4 ci) { - return Float4(ci) * Float4(1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF); + return Float4(ci) * Float4(1.0f / 0xFFFF); } - void PixelRoutine::sRGBtoLinear16_16(Registers &r, Color4i &c) + void PixelRoutine::sRGBtoLinear16_16(Registers &r, Vector4i &c) { - c.r = As<UShort4>(c.r) >> 4; - c.g = As<UShort4>(c.g) >> 4; - c.b = As<UShort4>(c.b) >> 4; + c.x = As<UShort4>(c.x) >> 4; + c.y = As<UShort4>(c.y) >> 4; + c.z = As<UShort4>(c.z) >> 4; sRGBtoLinear12_16(r, c); } - void PixelRoutine::sRGBtoLinear12_16(Registers &r, Color4i &c) + void PixelRoutine::sRGBtoLinear12_16(Registers &r, Vector4i &c) { Pointer<Byte> LUT = r.constants + OFFSET(Constants,sRGBtoLin12_16); - c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 0))), 0); - c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 1))), 1); - c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 2))), 2); - c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 3))), 3); + c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); + c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); + c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); + c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); - c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 0))), 0); - c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 1))), 1); - c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 2))), 2); - c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 3))), 3); + c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); + c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); + c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); + c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); - c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 0))), 0); - c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 1))), 1); - c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 2))), 2); - c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 3))), 3); + c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); + c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); + c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); + c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); } - void PixelRoutine::linearToSRGB16_16(Registers &r, Color4i &c) + void PixelRoutine::linearToSRGB16_16(Registers &r, Vector4i &c) { - c.r = As<UShort4>(c.r) >> 4; - c.g = As<UShort4>(c.g) >> 4; - c.b = As<UShort4>(c.b) >> 4; + c.x = As<UShort4>(c.x) >> 4; + c.y = As<UShort4>(c.y) >> 4; + c.z = As<UShort4>(c.z) >> 4; linearToSRGB12_16(r, c); } - void PixelRoutine::linearToSRGB12_16(Registers &r, Color4i &c) + void PixelRoutine::linearToSRGB12_16(Registers &r, Vector4i &c) { Pointer<Byte> LUT = r.constants + OFFSET(Constants,linToSRGB12_16); - c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 0))), 0); - c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 1))), 1); - c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 2))), 2); - c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 3))), 3); + c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); + c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); + c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); + c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); - c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 0))), 0); - c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 1))), 1); - c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 2))), 2); - c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 3))), 3); + c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); + c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); + c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); + c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); - c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 0))), 0); - c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 1))), 1); - c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 2))), 2); - c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 3))), 3); + c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); + c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); + c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); + c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); } Float4 PixelRoutine::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) @@ -4057,31 +4221,31 @@ return Min(Max(linear, Float4(0.0f)), Float4(1.0f)); } - void PixelRoutine::MOV(Color4i &dst, Color4i &src0) + void PixelRoutine::MOV(Vector4i &dst, Vector4i &src0) { - dst.r = src0.x; - dst.g = src0.y; - dst.b = src0.z; - dst.a = src0.w; + dst.x = src0.x; + dst.y = src0.y; + dst.z = src0.z; + dst.w = src0.w; } - void PixelRoutine::ADD(Color4i &dst, Color4i &src0, Color4i &src1) + void PixelRoutine::ADD(Vector4i &dst, Vector4i &src0, Vector4i &src1) { - dst.r = AddSat(src0.x, src1.x); - dst.g = AddSat(src0.y, src1.y); - dst.b = AddSat(src0.z, src1.z); - dst.a = AddSat(src0.w, src1.w); + dst.x = AddSat(src0.x, src1.x); + dst.y = AddSat(src0.y, src1.y); + dst.z = AddSat(src0.z, src1.z); + dst.w = AddSat(src0.w, src1.w); } - void PixelRoutine::SUB(Color4i &dst, Color4i &src0, Color4i &src1) + void PixelRoutine::SUB(Vector4i &dst, Vector4i &src0, Vector4i &src1) { - dst.r = SubSat(src0.x, src1.x); - dst.g = SubSat(src0.y, src1.y); - dst.b = SubSat(src0.z, src1.z); - dst.a = SubSat(src0.w, src1.w); + dst.x = SubSat(src0.x, src1.x); + dst.y = SubSat(src0.y, src1.y); + dst.z = SubSat(src0.z, src1.z); + dst.w = SubSat(src0.w, src1.w); } - void PixelRoutine::MAD(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2) + void PixelRoutine::MAD(Vector4i &dst, Vector4i &src0, Vector4i &src1, Vector4i &src2) { // FIXME: Long fixed-point multiply fixup {dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x);} @@ -4090,7 +4254,7 @@ {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w);} } - void PixelRoutine::MUL(Color4i &dst, Color4i &src0, Color4i &src1) + void PixelRoutine::MUL(Vector4i &dst, Vector4i &src0, Vector4i &src1) { // FIXME: Long fixed-point multiply fixup {dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x);} @@ -4099,7 +4263,7 @@ {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w);} } - void PixelRoutine::DP3(Color4i &dst, Color4i &src0, Color4i &src1) + void PixelRoutine::DP3(Vector4i &dst, Vector4i &src0, Vector4i &src1) { Short4 t0; Short4 t1; @@ -4111,13 +4275,13 @@ t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t0 = AddSat(t0, t1); - dst.r = t0; - dst.g = t0; - dst.b = t0; - dst.a = t0; + dst.x = t0; + dst.y = t0; + dst.z = t0; + dst.w = t0; } - void PixelRoutine::DP4(Color4i &dst, Color4i &src0, Color4i &src1) + void PixelRoutine::DP4(Vector4i &dst, Vector4i &src0, Vector4i &src1) { Short4 t0; Short4 t1; @@ -4131,13 +4295,13 @@ t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t0 = AddSat(t0, t1); - dst.r = t0; - dst.g = t0; - dst.b = t0; - dst.a = t0; + dst.x = t0; + dst.y = t0; + dst.z = t0; + dst.w = t0; } - void PixelRoutine::LRP(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2) + void PixelRoutine::LRP(Vector4i &dst, Vector4i &src0, Vector4i &src1, Vector4i &src2) { // FIXME: Long fixed-point multiply fixup {dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x);} @@ -4146,7 +4310,7 @@ {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w);} } - void PixelRoutine::TEXCOORD(Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate) + void PixelRoutine::TEXCOORD(Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate) { Float4 uw; Float4 vw; @@ -4154,41 +4318,41 @@ if(state.interpolant[2 + coordinate].component & 0x01) { - uw = Max(u, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - uw = Min(uw, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - dst.r = convertFixed12(uw); + uw = Max(u, Float4(0.0f)); + uw = Min(uw, Float4(1.0f)); + dst.x = convertFixed12(uw); } else { - dst.r = Short4(0x0000, 0x0000, 0x0000, 0x0000); + dst.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); } if(state.interpolant[2 + coordinate].component & 0x02) { - vw = Max(v, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - vw = Min(vw, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - dst.g = convertFixed12(vw); + vw = Max(v, Float4(0.0f)); + vw = Min(vw, Float4(1.0f)); + dst.y = convertFixed12(vw); } else { - dst.g = Short4(0x0000, 0x0000, 0x0000, 0x0000); + dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); } if(state.interpolant[2 + coordinate].component & 0x04) { - sw = Max(s, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - sw = Min(sw, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - dst.b = convertFixed12(sw); + sw = Max(s, Float4(0.0f)); + sw = Min(sw, Float4(1.0f)); + dst.z = convertFixed12(sw); } else { - dst.b = Short4(0x0000, 0x0000, 0x0000, 0x0000); + dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); } - dst.a = Short4(0x1000, 0x1000, 0x1000, 0x1000); + dst.w = Short4(0x1000); } - void PixelRoutine::TEXCRD(Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project) + void PixelRoutine::TEXCRD(Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project) { Float4 uw = u; Float4 vw = v; @@ -4202,68 +4366,68 @@ if(state.interpolant[2 + coordinate].component & 0x01) { - uw *= Float4(0x1000, 0x1000, 0x1000, 0x1000); - uw = Max(uw, Float4(-0x8000, -0x8000, -0x8000, -0x8000)); - uw = Min(uw, Float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF)); - dst.r = RoundShort4(uw); + uw *= Float4(0x1000); + uw = Max(uw, Float4(-0x8000)); + uw = Min(uw, Float4(0x7FFF)); + dst.x = RoundShort4(uw); } else { - dst.r = Short4(0x0000, 0x0000, 0x0000, 0x0000); + dst.x = Short4(0x0000); } if(state.interpolant[2 + coordinate].component & 0x02) { - vw *= Float4(0x1000, 0x1000, 0x1000, 0x1000); - vw = Max(vw, Float4(-0x8000, -0x8000, -0x8000, -0x8000)); - vw = Min(vw, Float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF)); - dst.g = RoundShort4(vw); + vw *= Float4(0x1000); + vw = Max(vw, Float4(-0x8000)); + vw = Min(vw, Float4(0x7FFF)); + dst.y = RoundShort4(vw); } else { - dst.g = Short4(0x0000, 0x0000, 0x0000, 0x0000); + dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); } if(state.interpolant[2 + coordinate].component & 0x04) { - sw *= Float4(0x1000, 0x1000, 0x1000, 0x1000); - sw = Max(sw, Float4(-0x8000, -0x8000, -0x8000, -0x8000)); - sw = Min(sw, Float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF)); - dst.b = RoundShort4(sw); + sw *= Float4(0x1000); + sw = Max(sw, Float4(-0x8000)); + sw = Min(sw, Float4(0x7FFF)); + dst.z = RoundShort4(sw); } else { - dst.b = Short4(0x0000, 0x0000, 0x0000, 0x0000); + dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); } } - void PixelRoutine::TEXDP3(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src) + void PixelRoutine::TEXDP3(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, Vector4i &src) { TEXM3X3PAD(r, u, v, s, src, 0, false); - Short4 t0 = RoundShort4(r.u_ * Float4(0x1000, 0x1000, 0x1000, 0x1000)); + Short4 t0 = RoundShort4(r.u_ * Float4(0x1000)); - dst.r = t0; - dst.g = t0; - dst.b = t0; - dst.a = t0; + dst.x = t0; + dst.y = t0; + dst.z = t0; + dst.w = t0; } - void PixelRoutine::TEXDP3TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0) + void PixelRoutine::TEXDP3TEX(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0) { TEXM3X3PAD(r, u, v, s, src0, 0, false); - r.v_ = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.w_ = Float4(0.0f, 0.0f, 0.0f, 0.0f); + r.v_ = Float4(0.0f); + r.w_ = Float4(0.0f); sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_); } void PixelRoutine::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s) { - Int kill = SignMask(CmpNLT(u, Float4(0, 0, 0, 0))) & - SignMask(CmpNLT(v, Float4(0, 0, 0, 0))) & - SignMask(CmpNLT(s, Float4(0, 0, 0, 0))); + Int kill = SignMask(CmpNLT(u, Float4(0.0f))) & + SignMask(CmpNLT(v, Float4(0.0f))) & + SignMask(CmpNLT(s, Float4(0.0f))); for(unsigned int q = 0; q < state.multiSample; q++) { @@ -4271,9 +4435,9 @@ } } - void PixelRoutine::TEXKILL(Int cMask[4], Color4i &src) + void PixelRoutine::TEXKILL(Int cMask[4], Vector4i &src) { - Short4 test = src.r | src.g | src.b; + Short4 test = src.x | src.y | src.z; Int kill = SignMask(Pack(test, test)) ^ 0x0000000F; for(unsigned int q = 0; q < state.multiSample; q++) @@ -4282,24 +4446,24 @@ } } - void PixelRoutine::TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project) + void PixelRoutine::TEX(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project) { sampleTexture(r, dst, sampler, u, v, s, s, project); } - void PixelRoutine::TEXLD(Registers &r, Color4i &dst, Color4i &src, int sampler, bool project) + void PixelRoutine::TEXLD(Registers &r, Vector4i &dst, Vector4i &src, int sampler, bool project) { - Float4 u = Float4(src.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 v = Float4(src.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 s = Float4(src.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); + Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE); + Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE); + Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE); sampleTexture(r, dst, sampler, u, v, s, s, project); } - void PixelRoutine::TEXBEM(Registers &r, Color4i &dst, Color4i &src, Float4 &u, Float4 &v, Float4 &s, int stage) + void PixelRoutine::TEXBEM(Registers &r, Vector4i &dst, Vector4i &src, Float4 &u, Float4 &v, Float4 &s, int stage) { - Float4 du = Float4(src.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 dv = Float4(src.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); + Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE); + Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE); Float4 du2 = du; Float4 dv2 = dv; @@ -4317,10 +4481,10 @@ sampleTexture(r, dst, stage, u_, v_, s, s); } - void PixelRoutine::TEXBEML(Registers &r, Color4i &dst, Color4i &src, Float4 &u, Float4 &v, Float4 &s, int stage) + void PixelRoutine::TEXBEML(Registers &r, Vector4i &dst, Vector4i &src, Float4 &u, Float4 &v, Float4 &s, int stage) { - Float4 du = Float4(src.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 dv = Float4(src.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); + Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE); + Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE); Float4 du2 = du; Float4 dv2 = dv; @@ -4339,46 +4503,46 @@ Short4 L; - L = src.b; + L = src.z; L = MulHigh(L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceScale4))); L = L << 4; L = AddSat(L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceOffset4))); L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000)); - L = Min(L, Short4(0x1000, 0x1000, 0x1000, 0x1000)); + L = Min(L, Short4(0x1000)); - dst.r = MulHigh(dst.r, L); dst.r = dst.r << 4; - dst.g = MulHigh(dst.g, L); dst.g = dst.g << 4; - dst.b = MulHigh(dst.b, L); dst.b = dst.b << 4; + dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4; + dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4; + dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4; } - void PixelRoutine::TEXREG2AR(Registers &r, Color4i &dst, Color4i &src0, int stage) + void PixelRoutine::TEXREG2AR(Registers &r, Vector4i &dst, Vector4i &src0, int stage) { - Float4 u = Float4(src0.a) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 v = Float4(src0.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 s = Float4(src0.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); + Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE); + Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE); + Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE); sampleTexture(r, dst, stage, u, v, s, s); } - void PixelRoutine::TEXREG2GB(Registers &r, Color4i &dst, Color4i &src0, int stage) + void PixelRoutine::TEXREG2GB(Registers &r, Vector4i &dst, Vector4i &src0, int stage) { - Float4 u = Float4(src0.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 v = Float4(src0.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); + Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE); + Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE); Float4 s = v; sampleTexture(r, dst, stage, u, v, s, s); } - void PixelRoutine::TEXREG2RGB(Registers &r, Color4i &dst, Color4i &src0, int stage) + void PixelRoutine::TEXREG2RGB(Registers &r, Vector4i &dst, Vector4i &src0, int stage) { - Float4 u = Float4(src0.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 v = Float4(src0.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - Float4 s = Float4(src0.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); + Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE); + Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE); + Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE); sampleTexture(r, dst, stage, u, v, s, s); } - void PixelRoutine::TEXM3X2DEPTH(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src, bool signedScaling) + void PixelRoutine::TEXM3X2DEPTH(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, Vector4i &src, bool signedScaling) { TEXM3X2PAD(r, u, v, s, src, 1, signedScaling); @@ -4388,44 +4552,44 @@ r.oDepth = r.u_; } - void PixelRoutine::TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, int component, bool signedScaling) + void PixelRoutine::TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4i &src0, int component, bool signedScaling) { TEXM3X3PAD(r, u, v, s, src0, component, signedScaling); } - void PixelRoutine::TEXM3X2TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, bool signedScaling) + void PixelRoutine::TEXM3X2TEX(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0, bool signedScaling) { TEXM3X2PAD(r, u, v, s, src0, 1, signedScaling); - r.w_ = Float4(0.0f, 0.0f, 0.0f, 0.0f); + r.w_ = Float4(0.0f); sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_); } - void PixelRoutine::TEXM3X3(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, bool signedScaling) + void PixelRoutine::TEXM3X3(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, Vector4i &src0, bool signedScaling) { TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling); - dst.r = RoundShort4(r.u_ * Float4(0x1000, 0x1000, 0x1000, 0x1000)); - dst.g = RoundShort4(r.v_ * Float4(0x1000, 0x1000, 0x1000, 0x1000)); - dst.b = RoundShort4(r.w_ * Float4(0x1000, 0x1000, 0x1000, 0x1000)); - dst.a = Short4(0x1000, 0x1000, 0x1000, 0x1000); + dst.x = RoundShort4(r.u_ * Float4(0x1000)); + dst.y = RoundShort4(r.v_ * Float4(0x1000)); + dst.z = RoundShort4(r.w_ * Float4(0x1000)); + dst.w = Short4(0x1000); } - void PixelRoutine::TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, int component, bool signedScaling) + void PixelRoutine::TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4i &src0, int component, bool signedScaling) { if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers? { - r.U = Float4(src0.r); - r.V = Float4(src0.g); - r.W = Float4(src0.b); + r.U = Float4(src0.x); + r.V = Float4(src0.y); + r.W = Float4(src0.z); previousScaling = signedScaling; } Float4 x = r.U * u + r.V * v + r.W * s; - x *= Float4(1.0f / 0x1000, 1.0f / 0x1000, 1.0f / 0x1000, 1.0f / 0x1000); + x *= Float4(1.0f / 0x1000); switch(component) { @@ -4436,15 +4600,15 @@ } } - void PixelRoutine::TEXM3X3SPEC(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, Color4i &src1) + void PixelRoutine::TEXM3X3SPEC(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0, Vector4i &src1) { TEXM3X3PAD(r, u, v, s, src0, 2, false); Float4 E[3]; // Eye vector - E[0] = Float4(src1.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - E[1] = Float4(src1.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); - E[2] = Float4(src1.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE); + E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE); + E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE); + E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE); // Reflection Float4 u__; @@ -4473,22 +4637,22 @@ sampleTexture(r, dst, stage, u__, v__, w__, w__); } - void PixelRoutine::TEXM3X3TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, bool signedScaling) + void PixelRoutine::TEXM3X3TEX(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0, bool signedScaling) { TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling); sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_); } - void PixelRoutine::TEXM3X3VSPEC(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0) + void PixelRoutine::TEXM3X3VSPEC(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0) { TEXM3X3PAD(r, u, v, s, src0, 2, false); Float4 E[3]; // Eye vector - E[0] = r.vw[2 + stage - 2]; - E[1] = r.vw[2 + stage - 1]; - E[2] = r.vw[2 + stage - 0]; + E[0] = r.vf[2 + stage - 2].w; + E[1] = r.vf[2 + stage - 1].w; + E[2] = r.vf[2 + stage - 0].w; // Reflection Float4 u__; @@ -4519,8 +4683,8 @@ void PixelRoutine::TEXDEPTH(Registers &r) { - r.u_ = Float4(r.ri[5].r); - r.v_ = Float4(r.ri[5].g); + r.u_ = Float4(r.ri[5].x); + r.v_ = Float4(r.ri[5].y); // z / w r.u_ *= Rcp_pp(r.v_); // FIXME: Set result to 1.0 when division by zero @@ -4528,68 +4692,68 @@ r.oDepth = r.u_; } - void PixelRoutine::CND(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2) + void PixelRoutine::CND(Vector4i &dst, Vector4i &src0, Vector4i &src1, Vector4i &src2) { - {Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.r = t0;}; - {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.g = t0;}; - {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.b = t0;}; - {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.a = t0;}; + {Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0;}; + {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0;}; + {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0;}; + {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0;}; } - void PixelRoutine::CMP(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2) + void PixelRoutine::CMP(Vector4i &dst, Vector4i &src0, Vector4i &src1, Vector4i &src2) { - {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.r = t0;}; - {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.g = t0;}; - {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.b = t0;}; - {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.a = t0;}; + {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0;}; + {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0;}; + {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0;}; + {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0;}; } - void PixelRoutine::BEM(Registers &r, Color4i &dst, Color4i &src0, Color4i &src1, int stage) + void PixelRoutine::BEM(Registers &r, Vector4i &dst, Vector4i &src0, Vector4i &src1, int stage) { Short4 t0; Short4 t1; - // dst.r = src0.r + BUMPENVMAT00(stage) * src1.r + BUMPENVMAT10(stage) * src1.g + // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard. t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard. t0 = AddSat(t0, t1); t0 = AddSat(t0, src0.x); - dst.r = t0; + dst.x = t0; - // dst.g = src0.g + BUMPENVMAT01(stage) * src1.r + BUMPENVMAT11(stage) * src1.g + // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard. t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard. t0 = AddSat(t0, t1); t0 = AddSat(t0, src0.y); - dst.g = t0; + dst.y = t0; } - void PixelRoutine::M3X2(Registers &r, Color4f &dst, Color4f &src0, const Src &src1) + void PixelRoutine::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); dst.x = dot3(src0, row0); dst.y = dot3(src0, row1); } - void PixelRoutine::M3X3(Registers &r, Color4f &dst, Color4f &src0, const Src &src1) + void PixelRoutine::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); - Color4f row2 = reg(r, src1, 2); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); + Vector4f row2 = reg(r, src1, 2); dst.x = dot3(src0, row0); dst.y = dot3(src0, row1); dst.z = dot3(src0, row2); } - void PixelRoutine::M3X4(Registers &r, Color4f &dst, Color4f &src0, const Src &src1) + void PixelRoutine::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); - Color4f row2 = reg(r, src1, 2); - Color4f row3 = reg(r, src1, 3); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); + Vector4f row2 = reg(r, src1, 2); + Vector4f row3 = reg(r, src1, 3); dst.x = dot3(src0, row0); dst.y = dot3(src0, row1); @@ -4597,23 +4761,23 @@ dst.w = dot3(src0, row3); } - void PixelRoutine::M4X3(Registers &r, Color4f &dst, Color4f &src0, const Src &src1) + void PixelRoutine::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); - Color4f row2 = reg(r, src1, 2); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); + Vector4f row2 = reg(r, src1, 2); dst.x = dot4(src0, row0); dst.y = dot4(src0, row1); dst.z = dot4(src0, row2); } - void PixelRoutine::M4X4(Registers &r, Color4f &dst, Color4f &src0, const Src &src1) + void PixelRoutine::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); - Color4f row2 = reg(r, src1, 2); - Color4f row3 = reg(r, src1, 3); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); + Vector4f row2 = reg(r, src1, 2); + Vector4f row3 = reg(r, src1, 3); dst.x = dot4(src0, row0); dst.y = dot4(src0, row1); @@ -4621,11 +4785,10 @@ dst.w = dot4(src0, row3); } - void PixelRoutine::TEXLD(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, bool project, bool bias) + void PixelRoutine::TEXLD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) { - Color4f tmp; - - sampleTexture(r, tmp, src1.index, src0.u, src0.v, src0.s, src0.t, src0, src0, project, bias); + Vector4f tmp; + sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias); dst.x = tmp[(src1.swizzle >> 0) & 0x3]; dst.y = tmp[(src1.swizzle >> 2) & 0x3]; @@ -4633,11 +4796,10 @@ dst.w = tmp[(src1.swizzle >> 6) & 0x3]; } - void PixelRoutine::TEXLDD(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, Color4f &src2, Color4f &src3, bool project, bool bias) + void PixelRoutine::TEXLDD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project, bool bias) { - Color4f tmp; - - sampleTexture(r, tmp, src1.index, src0.u, src0.v, src0.s, src0.t, src2, src3, project, bias, true); + Vector4f tmp; + sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, project, bias, true); dst.x = tmp[(src1.swizzle >> 0) & 0x3]; dst.y = tmp[(src1.swizzle >> 2) & 0x3]; @@ -4645,11 +4807,10 @@ dst.w = tmp[(src1.swizzle >> 6) & 0x3]; } - void PixelRoutine::TEXLDL(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, bool project, bool bias) + void PixelRoutine::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) { - Color4f tmp; - - sampleTexture(r, tmp, src1.index, src0.u, src0.v, src0.s, src0.t, src0, src0, project, bias, false, true); + Vector4f tmp; + sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias, false, true); dst.x = tmp[(src1.swizzle >> 0) & 0x3]; dst.y = tmp[(src1.swizzle >> 2) & 0x3]; @@ -4657,35 +4818,69 @@ dst.w = tmp[(src1.swizzle >> 6) & 0x3]; } - void PixelRoutine::TEXKILL(Int cMask[4], Color4f &src, unsigned char mask) + void PixelRoutine::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask) { Int kill = -1; - if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0, 0, 0, 0))); - if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0, 0, 0, 0))); - if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0, 0, 0, 0))); - if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0, 0, 0, 0))); + if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f))); + if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f))); + if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f))); + if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f))); + + // FIXME: Dynamic branching affects TEXKILL? + // if(shader->containsDynamicBranching()) + // { + // kill = ~SignMask(enableMask(r)); + // } for(unsigned int q = 0; q < state.multiSample; q++) { cMask[q] &= kill; } + + // FIXME: Branch to end of shader if all killed? } - void PixelRoutine::DSX(Color4f &dst, Color4f &src) + void PixelRoutine::DISCARD(Registers &r, Int cMask[4], const Shader::Instruction *instruction) { - dst.x = src.x.yyyy - src.x.xxxx; - dst.y = src.y.yyyy - src.y.xxxx; - dst.z = src.z.yyyy - src.z.xxxx; - dst.w = src.w.yyyy - src.w.xxxx; + Int kill = 0; + + if(shader->containsDynamicBranching()) + { + kill = ~SignMask(enableMask(r, instruction)); + } + + for(unsigned int q = 0; q < state.multiSample; q++) + { + cMask[q] &= kill; + } + + // FIXME: Branch to end of shader if all killed? } - void PixelRoutine::DSY(Color4f &dst, Color4f &src) + void PixelRoutine::DFDX(Vector4f &dst, Vector4f &src) { - dst.x = src.x.zzzz - src.x.xxxx; - dst.y = src.y.zzzz - src.y.xxxx; - dst.z = src.z.zzzz - src.z.xxxx; - dst.w = src.w.zzzz - src.w.xxxx; + dst.x = src.x.yyww - src.x.xxzz; + dst.y = src.y.yyww - src.y.xxzz; + dst.z = src.z.yyww - src.z.xxzz; + dst.w = src.w.yyww - src.w.xxzz; + } + + void PixelRoutine::DFDY(Vector4f &dst, Vector4f &src) + { + dst.x = src.x.zwzw - src.x.xyxy; + dst.y = src.y.zwzw - src.y.xyxy; + dst.z = src.z.zwzw - src.z.xyxy; + dst.w = src.w.zwzw - src.w.xyxy; + } + + void PixelRoutine::FWIDTH(Vector4f &dst, Vector4f &src) + { + // abs(dFdx(src)) + abs(dFdy(src)); + dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy); + dst.y = Abs(src.y.yyww - src.x.xxzz) + Abs(src.y.zwzw - src.y.xyxy); + dst.z = Abs(src.z.yyww - src.x.xxzz) + Abs(src.z.zwzw - src.z.xyxy); + dst.w = Abs(src.w.yyww - src.x.xxzz) + Abs(src.w.zwzw - src.w.xyxy); } void PixelRoutine::BREAK(Registers &r) @@ -4695,6 +4890,7 @@ if(breakDepth == 0) { + r.enableIndex = r.enableIndex - breakDepth; Nucleus::createBr(endBlock); } else @@ -4702,49 +4898,47 @@ r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex]; Bool allBreak = SignMask(r.enableBreak) == 0x0; + r.enableIndex = r.enableIndex - breakDepth; branch(allBreak, endBlock, deadBlock); } Nucleus::setInsertBlock(deadBlock); + r.enableIndex = r.enableIndex + breakDepth; } - void PixelRoutine::BREAKC(Registers &r, Color4f &src0, Color4f &src1, Control control) + void PixelRoutine::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control) { Int4 condition; switch(control) { - case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; - case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; - case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; - case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; - case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; - case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; + case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; + case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; + case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; + case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; + case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; + case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; default: ASSERT(false); } - condition &= r.enableStack[r.enableIndex]; - - llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock(); - llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; - - r.enableBreak = r.enableBreak & ~condition; - Bool allBreak = SignMask(r.enableBreak) == 0x0; - - branch(allBreak, endBlock, continueBlock); - Nucleus::setInsertBlock(continueBlock); + BREAK(r, condition); } void PixelRoutine::BREAKP(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC { Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); - if(predicateRegister.modifier == Src::MODIFIER_NOT) + if(predicateRegister.modifier == Shader::MODIFIER_NOT) { condition = ~condition; } + BREAK(r, condition); + } + + void PixelRoutine::BREAK(Registers &r, Int4 &condition) + { condition &= r.enableStack[r.enableIndex]; llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock(); @@ -4753,44 +4947,61 @@ r.enableBreak = r.enableBreak & ~condition; Bool allBreak = SignMask(r.enableBreak) == 0x0; + r.enableIndex = r.enableIndex - breakDepth; branch(allBreak, endBlock, continueBlock); + Nucleus::setInsertBlock(continueBlock); + r.enableIndex = r.enableIndex + breakDepth; } - void PixelRoutine::CALL(Registers &r, int labelIndex) + void PixelRoutine::CONTINUE(Registers &r) + { + r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex]; + } + + void PixelRoutine::TEST() + { + whileTest = true; + } + + void PixelRoutine::CALL(Registers &r, int labelIndex, int callSiteIndex) { if(!labelBlock[labelIndex]) { labelBlock[labelIndex] = Nucleus::createBasicBlock(); } - llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); - callRetBlock.push_back(retBlock); + if(callRetBlock[labelIndex].size() > 1) + { + r.callStack[r.stackIndex++] = UInt(callSiteIndex); + } - r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME + Int4 restoreLeave = r.enableLeave; Nucleus::createBr(labelBlock[labelIndex]); - Nucleus::setInsertBlock(retBlock); + Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); + + r.enableLeave = restoreLeave; } - void PixelRoutine::CALLNZ(Registers &r, int labelIndex, const Src &src) + void PixelRoutine::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src) { - if(src.type == Src::PARAMETER_CONSTBOOL) + if(src.type == Shader::PARAMETER_CONSTBOOL) { - CALLNZb(r, labelIndex, src); + CALLNZb(r, labelIndex, callSiteIndex, src); } - else if(src.type == Src::PARAMETER_PREDICATE) + else if(src.type == Shader::PARAMETER_PREDICATE) { - CALLNZp(r, labelIndex, src); + CALLNZp(r, labelIndex, callSiteIndex, src); } else ASSERT(false); } - void PixelRoutine::CALLNZb(Registers &r, int labelIndex, const Src &boolRegister) + void PixelRoutine::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister) { Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,ps.b[boolRegister.index])) != Byte(0)); // FIXME - if(boolRegister.modifier == Src::MODIFIER_NOT) + if(boolRegister.modifier == Shader::MODIFIER_NOT) { condition = !condition; } @@ -4800,20 +5011,24 @@ labelBlock[labelIndex] = Nucleus::createBasicBlock(); } - llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); - callRetBlock.push_back(retBlock); + if(callRetBlock[labelIndex].size() > 1) + { + r.callStack[r.stackIndex++] = UInt(callSiteIndex); + } - r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME + Int4 restoreLeave = r.enableLeave; - branch(condition, labelBlock[labelIndex], retBlock); - Nucleus::setInsertBlock(retBlock); + branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); + Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); + + r.enableLeave = restoreLeave; } - void PixelRoutine::CALLNZp(Registers &r, int labelIndex, const Src &predicateRegister) + void PixelRoutine::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister) { Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); - if(predicateRegister.modifier == Src::MODIFIER_NOT) + if(predicateRegister.modifier == Shader::MODIFIER_NOT) { condition = ~condition; } @@ -4825,20 +5040,21 @@ labelBlock[labelIndex] = Nucleus::createBasicBlock(); } - llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); - callRetBlock.push_back(retBlock); - - r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME + if(callRetBlock[labelIndex].size() > 1) + { + r.callStack[r.stackIndex++] = UInt(callSiteIndex); + } r.enableIndex++; r.enableStack[r.enableIndex] = condition; + Int4 restoreLeave = r.enableLeave; - Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; - - branch(notAllFalse, labelBlock[labelIndex], retBlock); - Nucleus::setInsertBlock(retBlock); + Bool notAllFalse = SignMask(condition) != 0; + branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); + Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); r.enableIndex--; + r.enableLeave = restoreLeave; } void PixelRoutine::ELSE(Registers &r) @@ -4851,7 +5067,7 @@ if(isConditionalIf[ifDepth]) { Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1]; - Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; + Bool notAllFalse = SignMask(condition) != 0; branch(notAllFalse, falseBlock, endBlock); @@ -4884,20 +5100,6 @@ } } - void PixelRoutine::ENDREP(Registers &r) - { - loopRepDepth--; - - llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; - llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; - - Nucleus::createBr(testBlock); - Nucleus::setInsertBlock(endBlock); - - r.loopDepth--; - r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - } - void PixelRoutine::ENDLOOP(Registers &r) { loopRepDepth--; @@ -4914,26 +5116,61 @@ r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); } + void PixelRoutine::ENDREP(Registers &r) + { + loopRepDepth--; + + llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; + llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; + + Nucleus::createBr(testBlock); + Nucleus::setInsertBlock(endBlock); + + r.loopDepth--; + r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + void PixelRoutine::ENDWHILE(Registers &r) + { + loopRepDepth--; + + llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; + llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; + + Nucleus::createBr(testBlock); + Nucleus::setInsertBlock(endBlock); + + r.enableIndex--; + r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + whileTest = false; + } + void PixelRoutine::IF(Registers &r, const Src &src) { - if(src.type == Src::PARAMETER_CONSTBOOL) + if(src.type == Shader::PARAMETER_CONSTBOOL) { IFb(r, src); } - else if(src.type == Src::PARAMETER_PREDICATE) + else if(src.type == Shader::PARAMETER_PREDICATE) { IFp(r, src); } - else ASSERT(false); + else + { + Int4 condition = As<Int4>(reg(r, src).x); + IF(r, condition); + } } void PixelRoutine::IFb(Registers &r, const Src &boolRegister) { + ASSERT(ifDepth < 24 + 4); + Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,ps.b[boolRegister.index])) != Byte(0)); // FIXME - if(boolRegister.modifier == Src::MODIFIER_NOT) + if(boolRegister.modifier == Shader::MODIFIER_NOT) { - condition = !condition; + condition = !condition; } llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); @@ -4947,50 +5184,39 @@ ifDepth++; } - void PixelRoutine::IFp(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with IFC + void PixelRoutine::IFp(Registers &r, const Src &predicateRegister) { Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); - if(predicateRegister.modifier == Src::MODIFIER_NOT) + if(predicateRegister.modifier == Shader::MODIFIER_NOT) { condition = ~condition; } - condition &= r.enableStack[r.enableIndex]; - - r.enableIndex++; - r.enableStack[r.enableIndex] = condition; - - llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); - llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); - - Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; - - branch(notAllFalse, trueBlock, falseBlock); - - isConditionalIf[ifDepth] = true; - ifFalseBlock[ifDepth] = falseBlock; - - ifDepth++; - breakDepth++; + IF(r, condition); } - void PixelRoutine::IFC(Registers &r, Color4f &src0, Color4f &src1, Control control) + void PixelRoutine::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control) { Int4 condition; switch(control) { - case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; - case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; - case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; - case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; - case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; - case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; + case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; + case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; + case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; + case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; + case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; + case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; default: ASSERT(false); } + IF(r, condition); + } + + void PixelRoutine::IF(Registers &r, Int4 &condition) + { condition &= r.enableStack[r.enableIndex]; r.enableIndex++; @@ -4999,7 +5225,7 @@ llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); - Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; + Bool notAllFalse = SignMask(condition) != 0; branch(notAllFalse, trueBlock, falseBlock); @@ -5012,7 +5238,13 @@ void PixelRoutine::LABEL(int labelIndex) { + if(!labelBlock[labelIndex]) + { + labelBlock[labelIndex] = Nucleus::createBasicBlock(); + } + Nucleus::setInsertBlock(labelBlock[labelIndex]); + currentLabel = labelIndex; } void PixelRoutine::LOOP(Registers &r, const Src &integerRegister) @@ -5075,27 +5307,73 @@ breakDepth = 0; } + void PixelRoutine::WHILE(Registers &r, const Src &temporaryRegister) + { + r.enableIndex++; + + llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); + llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); + llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); + + loopRepTestBlock[loopRepDepth] = testBlock; + loopRepEndBlock[loopRepDepth] = endBlock; + + Int4 restoreBreak = r.enableBreak; + Int4 restoreContinue = r.enableContinue; + + // FIXME: jump(testBlock) + Nucleus::createBr(testBlock); + Nucleus::setInsertBlock(testBlock); + r.enableContinue = restoreContinue; + + Vector4f &src = reg(r, temporaryRegister); + Int4 condition = As<Int4>(src.x); + condition &= r.enableStack[r.enableIndex - 1]; + r.enableStack[r.enableIndex] = condition; + + Bool notAllFalse = SignMask(condition) != 0; + branch(notAllFalse, loopBlock, endBlock); + + Nucleus::setInsertBlock(endBlock); + r.enableBreak = restoreBreak; + + Nucleus::setInsertBlock(loopBlock); + + loopRepDepth++; + breakDepth = 0; + } + void PixelRoutine::RET(Registers &r) { - if(!returns) + if(currentLabel == -1) { returnBlock = Nucleus::createBasicBlock(); Nucleus::createBr(returnBlock); - - returns = true; } else { - // FIXME: Encapsulate - UInt index = r.callStack[--r.stackIndex]; - llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); - llvm::Value *value = Nucleus::createLoad(index.address); - llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock.size()); - for(unsigned int i = 0; i < callRetBlock.size(); i++) + if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack { - Nucleus::addSwitchCase(switchInst, i, callRetBlock[i]); + // FIXME: Encapsulate + UInt index = r.callStack[--r.stackIndex]; + + llvm::Value *value = Nucleus::createLoad(index.address); + llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); + + for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) + { + Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]); + } + } + else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination + { + Nucleus::createBr(callRetBlock[currentLabel][0]); + } + else // Function isn't called + { + Nucleus::createBr(unreachableBlock); } Nucleus::setInsertBlock(unreachableBlock); @@ -5103,30 +5381,38 @@ } } - void PixelRoutine::writeDestination(Registers &r, Color4i &d, const Dst &dst) + void PixelRoutine::LEAVE(Registers &r) + { + r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex]; + + // FIXME: Return from function if all instances left + // FIXME: Use enableLeave in other control-flow constructs + } + + void PixelRoutine::writeDestination(Registers &r, Vector4i &d, const Dst &dst) { switch(dst.type) { - case Dst::PARAMETER_TEMP: + case Shader::PARAMETER_TEMP: if(dst.mask & 0x1) r.ri[dst.index].x = d.x; if(dst.mask & 0x2) r.ri[dst.index].y = d.y; if(dst.mask & 0x4) r.ri[dst.index].z = d.z; if(dst.mask & 0x8) r.ri[dst.index].w = d.w; break; - case Dst::PARAMETER_INPUT: + case Shader::PARAMETER_INPUT: if(dst.mask & 0x1) r.vi[dst.index].x = d.x; if(dst.mask & 0x2) r.vi[dst.index].y = d.y; if(dst.mask & 0x4) r.vi[dst.index].z = d.z; if(dst.mask & 0x8) r.vi[dst.index].w = d.w; break; - case Dst::PARAMETER_CONST: ASSERT(false); break; - case Dst::PARAMETER_TEXTURE: + case Shader::PARAMETER_CONST: ASSERT(false); break; + case Shader::PARAMETER_TEXTURE: if(dst.mask & 0x1) r.ti[dst.index].x = d.x; if(dst.mask & 0x2) r.ti[dst.index].y = d.y; if(dst.mask & 0x4) r.ti[dst.index].z = d.z; if(dst.mask & 0x8) r.ti[dst.index].w = d.w; break; - case Dst::PARAMETER_COLOROUT: + case Shader::PARAMETER_COLOROUT: if(dst.mask & 0x1) r.vi[dst.index].x = d.x; if(dst.mask & 0x2) r.vi[dst.index].y = d.y; if(dst.mask & 0x4) r.vi[dst.index].z = d.z; @@ -5137,29 +5423,29 @@ } } - Color4i PixelRoutine::regi(Registers &r, const Src &src) + Vector4i PixelRoutine::regi(Registers &r, const Src &src) { - Color4i *reg; + Vector4i *reg; int i = src.index; - Color4i c; + Vector4i c; - if(src.type == ShaderParameter::PARAMETER_CONST) + if(src.type == Shader::PARAMETER_CONST) { - c.r = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][0])); - c.g = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][1])); - c.b = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][2])); - c.a = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][3])); + c.x = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][0])); + c.y = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][1])); + c.z = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][2])); + c.w = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][3])); } switch(src.type) { - case Src::PARAMETER_TEMP: reg = &r.ri[i]; break; - case Src::PARAMETER_INPUT: reg = &r.vi[i]; break; - case Src::PARAMETER_CONST: reg = &c; break; - case Src::PARAMETER_TEXTURE: reg = &r.ti[i]; break; - case Src::PARAMETER_VOID: return r.ri[0]; // Dummy - case Src::PARAMETER_FLOATLITERAL: return r.ri[0]; // Dummy + case Shader::PARAMETER_TEMP: reg = &r.ri[i]; break; + case Shader::PARAMETER_INPUT: reg = &r.vi[i]; break; + case Shader::PARAMETER_CONST: reg = &c; break; + case Shader::PARAMETER_TEXTURE: reg = &r.ti[i]; break; + case Shader::PARAMETER_VOID: return r.ri[0]; // Dummy + case Shader::PARAMETER_FLOAT4LITERAL: return r.ri[0]; // Dummy default: ASSERT(false); } @@ -5169,180 +5455,177 @@ Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3]; Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3]; - Color4i mod; + Vector4i mod; switch(src.modifier) { - case Src::MODIFIER_NONE: - mod.r = x; - mod.g = y; - mod.b = z; - mod.a = w; + case Shader::MODIFIER_NONE: + mod.x = x; + mod.y = y; + mod.z = z; + mod.w = w; break; - case Src::MODIFIER_BIAS: - mod.r = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - mod.g = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - mod.b = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - mod.a = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + case Shader::MODIFIER_BIAS: + mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); break; - case Src::MODIFIER_BIAS_NEGATE: - mod.r = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x); - mod.g = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y); - mod.b = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z); - mod.a = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w); + case Shader::MODIFIER_BIAS_NEGATE: + mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x); + mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y); + mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z); + mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w); break; - case Src::MODIFIER_COMPLEMENT: - mod.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), x); - mod.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), y); - mod.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), z); - mod.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), w); + case Shader::MODIFIER_COMPLEMENT: + mod.x = SubSat(Short4(0x1000), x); + mod.y = SubSat(Short4(0x1000), y); + mod.z = SubSat(Short4(0x1000), z); + mod.w = SubSat(Short4(0x1000), w); break; - case Src::MODIFIER_NEGATE: - mod.r = -x; - mod.g = -y; - mod.b = -z; - mod.a = -w; + case Shader::MODIFIER_NEGATE: + mod.x = -x; + mod.y = -y; + mod.z = -z; + mod.w = -w; break; - case Src::MODIFIER_X2: - mod.r = AddSat(x, x); - mod.g = AddSat(y, y); - mod.b = AddSat(z, z); - mod.a = AddSat(w, w); + case Shader::MODIFIER_X2: + mod.x = AddSat(x, x); + mod.y = AddSat(y, y); + mod.z = AddSat(z, z); + mod.w = AddSat(w, w); break; - case Src::MODIFIER_X2_NEGATE: - mod.r = -AddSat(x, x); - mod.g = -AddSat(y, y); - mod.b = -AddSat(z, z); - mod.a = -AddSat(w, w); + case Shader::MODIFIER_X2_NEGATE: + mod.x = -AddSat(x, x); + mod.y = -AddSat(y, y); + mod.z = -AddSat(z, z); + mod.w = -AddSat(w, w); break; - case Src::MODIFIER_SIGN: - mod.r = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - mod.g = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - mod.b = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - mod.a = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); - mod.r = AddSat(mod.r, mod.r); - mod.g = AddSat(mod.g, mod.g); - mod.b = AddSat(mod.b, mod.b); - mod.a = AddSat(mod.a, mod.a); + case Shader::MODIFIER_SIGN: + mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800)); + mod.x = AddSat(mod.x, mod.x); + mod.y = AddSat(mod.y, mod.y); + mod.z = AddSat(mod.z, mod.z); + mod.w = AddSat(mod.w, mod.w); break; - case Src::MODIFIER_SIGN_NEGATE: - mod.r = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x); - mod.g = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y); - mod.b = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z); - mod.a = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w); - mod.r = AddSat(mod.r, mod.r); - mod.g = AddSat(mod.g, mod.g); - mod.b = AddSat(mod.b, mod.b); - mod.a = AddSat(mod.a, mod.a); + case Shader::MODIFIER_SIGN_NEGATE: + mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x); + mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y); + mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z); + mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w); + mod.x = AddSat(mod.x, mod.x); + mod.y = AddSat(mod.y, mod.y); + mod.z = AddSat(mod.z, mod.z); + mod.w = AddSat(mod.w, mod.w); break; - case Src::MODIFIER_DZ: - mod.r = x; - mod.g = y; - mod.b = z; - mod.a = w; + case Shader::MODIFIER_DZ: + mod.x = x; + mod.y = y; + mod.z = z; + mod.w = w; // Projection performed by texture sampler break; - case Src::MODIFIER_DW: - mod.r = x; - mod.g = y; - mod.b = z; - mod.a = w; + case Shader::MODIFIER_DW: + mod.x = x; + mod.y = y; + mod.z = z; + mod.w = w; // Projection performed by texture sampler break; default: ASSERT(false); } - if(src.type == ShaderParameter::PARAMETER_CONST && (src.modifier == Src::MODIFIER_X2 || src.modifier == Src::MODIFIER_X2_NEGATE)) + if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE)) { - mod.r = Min(mod.r, Short4(0x1000, 0x1000, 0x1000, 0x1000)); mod.r = Max(mod.r, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); - mod.g = Min(mod.g, Short4(0x1000, 0x1000, 0x1000, 0x1000)); mod.g = Max(mod.g, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); - mod.b = Min(mod.b, Short4(0x1000, 0x1000, 0x1000, 0x1000)); mod.b = Max(mod.b, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); - mod.a = Min(mod.a, Short4(0x1000, 0x1000, 0x1000, 0x1000)); mod.a = Max(mod.a, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); + mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); + mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); + mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); + mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000, -0x1000, -0x1000, -0x1000)); } return mod; } - Color4f PixelRoutine::reg(Registers &r, const Src &src, int offset) + Vector4f PixelRoutine::reg(Registers &r, const Src &src, int offset) { - Color4f reg; + Vector4f reg; int i = src.index + offset; switch(src.type) { - case Src::PARAMETER_TEMP: reg = r.rf[i]; break; - case Src::PARAMETER_INPUT: + case Shader::PARAMETER_TEMP: + if(src.rel.type == Shader::PARAMETER_VOID) { - if(!src.relative) + reg = r.rf[i]; + } + else + { + Int a = relativeAddress(r, src); + + reg = r.rf[i + a]; + } + break; + case Shader::PARAMETER_INPUT: + { + if(src.rel.type == Shader::PARAMETER_VOID) // Not relative { - reg.x = r.vx[i]; - reg.y = r.vy[i]; - reg.z = r.vz[i]; - reg.w = r.vw[i]; + reg = r.vf[i]; } - else if(src.relativeType == Src::PARAMETER_LOOP) + else if(src.rel.type == Shader::PARAMETER_LOOP) { Int aL = r.aL[r.loopDepth]; - reg.x = r.vx[i + aL]; - reg.y = r.vy[i + aL]; - reg.z = r.vz[i + aL]; - reg.w = r.vw[i + aL]; + reg = r.vf[i + aL]; } - else ASSERT(false); - } - break; - case Src::PARAMETER_CONST: - { - reg.r = reg.g = reg.b = reg.a = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i])); - - reg.r = reg.r.xxxx; - reg.g = reg.g.yyyy; - reg.b = reg.b.zzzz; - reg.a = reg.a.wwww; - - if(localShaderConstants) // Constant may be known at compile time + else { - for(int j = 0; j < pixelShader->getLength(); j++) - { - const ShaderInstruction &instruction = *pixelShader->getInstruction(j); - - if(instruction.getOpcode() == ShaderOperation::OPCODE_DEF) - { - if(instruction.getDestinationParameter().index == i) - { - reg.r = Float4(instruction.getSourceParameter(0).value); - reg.g = Float4(instruction.getSourceParameter(1).value); - reg.b = Float4(instruction.getSourceParameter(2).value); - reg.a = Float4(instruction.getSourceParameter(3).value); - - break; - } - } - } + Int a = relativeAddress(r, src); + + reg = r.vf[i + a]; } } break; - case Src::PARAMETER_TEXTURE: - { - reg.x = r.vx[2 + i]; - reg.y = r.vy[2 + i]; - reg.z = r.vz[2 + i]; - reg.w = r.vw[2 + i]; - } + case Shader::PARAMETER_CONST: + reg = readConstant(r, src, offset); break; - case Src::PARAMETER_MISCTYPE: + case Shader::PARAMETER_TEXTURE: + reg = r.vf[2 + i]; + break; + case Shader::PARAMETER_MISCTYPE: if(src.index == 0) reg = r.vPos; if(src.index == 1) reg = r.vFace; break; - case Src::PARAMETER_SAMPLER: return r.rf[0]; // Dummy - case Src::PARAMETER_PREDICATE: return r.rf[0]; // Dummy - case Src::PARAMETER_VOID: return r.rf[0]; // Dummy - case Src::PARAMETER_FLOATLITERAL: return r.rf[0]; // Dummy - case Src::PARAMETER_CONSTINT: return r.rf[0]; // Dummy - case Src::PARAMETER_CONSTBOOL: return r.rf[0]; // Dummy - case Src::PARAMETER_LOOP: return r.rf[0]; // Dummy + case Shader::PARAMETER_SAMPLER: + if(src.rel.type == Shader::PARAMETER_VOID) + { + reg.x = As<Float4>(Int4(i)); + } + else if(src.rel.type == Shader::PARAMETER_TEMP) + { + reg.x = As<Float4>(Int4(i) + RoundInt(r.rf[src.rel.index].x)); + } + return reg; + case Shader::PARAMETER_PREDICATE: return reg; // Dummy + case Shader::PARAMETER_VOID: return reg; // Dummy + case Shader::PARAMETER_FLOAT4LITERAL: + reg.x = Float4(src.value[0]); + reg.y = Float4(src.value[1]); + reg.z = Float4(src.value[2]); + reg.w = Float4(src.value[3]); + break; + case Shader::PARAMETER_CONSTINT: return reg; // Dummy + case Shader::PARAMETER_CONSTBOOL: return reg; // Dummy + case Shader::PARAMETER_LOOP: return reg; // Dummy + case Shader::PARAMETER_COLOROUT: + reg = r.oC[i]; + break; + case Shader::PARAMETER_DEPTHOUT: + reg.x = r.oDepth; + break; default: ASSERT(false); } @@ -5352,29 +5635,29 @@ Float4 &z = reg[(src.swizzle >> 4) & 0x3]; Float4 &w = reg[(src.swizzle >> 6) & 0x3]; - Color4f mod; + Vector4f mod; switch(src.modifier) { - case Src::MODIFIER_NONE: + case Shader::MODIFIER_NONE: mod.x = x; mod.y = y; mod.z = z; mod.w = w; break; - case Src::MODIFIER_NEGATE: + case Shader::MODIFIER_NEGATE: mod.x = -x; mod.y = -y; mod.z = -z; mod.w = -w; break; - case Src::MODIFIER_ABS: + case Shader::MODIFIER_ABS: mod.x = Abs(x); mod.y = Abs(y); mod.z = Abs(z); mod.w = Abs(w); break; - case Src::MODIFIER_ABS_NEGATE: + case Shader::MODIFIER_ABS_NEGATE: mod.x = -Abs(x); mod.y = -Abs(y); mod.z = -Abs(z); @@ -5387,13 +5670,134 @@ return mod; } - bool PixelRoutine::colorUsed() + Vector4f PixelRoutine::readConstant(Registers &r, const Src &src, int offset) { - return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsTexkill; + Vector4f c; + + int i = src.index + offset; + + if(src.rel.type == Shader::PARAMETER_VOID) // Not relative + { + c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i])); + + c.x = c.x.xxxx; + c.y = c.y.yyyy; + c.z = c.z.zzzz; + c.w = c.w.wwww; + + if(localShaderConstants) // Constant may be known at compile time + { + for(int j = 0; j < shader->getLength(); j++) + { + const Shader::Instruction &instruction = *shader->getInstruction(j); + + if(instruction.opcode == Shader::OPCODE_DEF) + { + if(instruction.dst.index == i) + { + c.x = Float4(instruction.src[0].value[0]); + c.y = Float4(instruction.src[0].value[1]); + c.z = Float4(instruction.src[0].value[2]); + c.w = Float4(instruction.src[0].value[3]); + + break; + } + } + } + } + } + else if(src.rel.type == Shader::PARAMETER_LOOP) + { + Int loopCounter = r.aL[r.loopDepth]; + + c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]) + loopCounter * 16); + + c.x = c.x.xxxx; + c.y = c.y.yyyy; + c.z = c.z.zzzz; + c.w = c.w.wwww; + } + else + { + Int a = relativeAddress(r, src); + + c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]) + a * 16); + + c.x = c.x.xxxx; + c.y = c.y.yyyy; + c.z = c.z.zzzz; + c.w = c.w.wwww; + } + + return c; } - unsigned short PixelRoutine::pixelShaderVersion() const + Int PixelRoutine::relativeAddress(Registers &r, const Shader::Parameter &var) { - return pixelShader ? pixelShader->getVersion() : 0x0000; + ASSERT(var.rel.deterministic); + + if(var.rel.type == Shader::PARAMETER_TEMP) + { + return RoundInt(Extract(r.rf[var.rel.index].x, 0)) * var.rel.scale; + } + else if(var.rel.type == Shader::PARAMETER_INPUT) + { + return RoundInt(Extract(r.vf[var.rel.index].x, 0)) * var.rel.scale; + } + else if(var.rel.type == Shader::PARAMETER_OUTPUT) + { + return RoundInt(Extract(r.oC[var.rel.index].x, 0)) * var.rel.scale; + } + else if(var.rel.type == Shader::PARAMETER_CONST) + { + RValue<Float4> c = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[var.rel.index])); + + return RoundInt(Extract(c, 0)) * var.rel.scale; + } + else ASSERT(false); + + return 0; + } + + Int4 PixelRoutine::enableMask(Registers &r, const Shader::Instruction *instruction) + { + Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF); + + if(shader->containsBreakInstruction() && !whileTest && instruction->analysisBreak) + { + enable &= r.enableBreak; + } + + if(shader->containsContinueInstruction() && !whileTest && instruction->analysisContinue) + { + enable &= r.enableContinue; + } + + if(shader->containsLeaveInstruction() && instruction->analysisLeave) + { + enable &= r.enableLeave; + } + + return enable; + } + + bool PixelRoutine::colorUsed() + { + return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill; + } + + unsigned short PixelRoutine::shaderVersion() const + { + return shader ? shader->getVersion() : 0x0000; + } + + bool PixelRoutine::interpolateZ() const + { + return state.depthTestActive || state.pixelFogActive() || (shader && shader->vPosDeclared && fullPixelPositionRegister); + } + + bool PixelRoutine::interpolateW() const + { + return state.perspective || (shader && shader->vPosDeclared && fullPixelPositionRegister); } }
diff --git a/src/Shader/PixelRoutine.hpp b/src/Shader/PixelRoutine.hpp index c1070ff..29ea75a 100644 --- a/src/Shader/PixelRoutine.hpp +++ b/src/Shader/PixelRoutine.hpp
@@ -1,304 +1,327 @@ -// SwiftShader Software Renderer -// -// Copyright(c) 2005-2011 TransGaming Inc. -// -// All rights reserved. No part of this software may be copied, distributed, transmitted, -// transcribed, stored in a retrieval system, translated into any human or computer -// language by any means, or disclosed to third parties without the explicit written -// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express -// or implied, including but not limited to any patent rights, are granted to you. -// - -#ifndef sw_PixelRoutine_hpp -#define sw_PixelRoutine_hpp - -#include "Rasterizer.hpp" -#include "ShaderCore.hpp" - -#include "Types.hpp" - -namespace sw -{ - extern bool forceClearRegisters; - - class PixelShader; - class SamplerCore; - - class PixelRoutine : public Rasterizer, public ShaderCore - { - friend PixelProcessor; // FIXME - - public: - PixelRoutine(const PixelProcessor::State &state, const PixelShader *pixelShader); - - ~PixelRoutine(); - - protected: - struct Registers - { - Registers() : current(ri[0]), diffuse(vi[0]), specular(vi[1]), callStack(4), aL(4), increment(4), iteration(4), enableStack(1 + 24), vx(10), vy(10), vz(10), vw(10) - { - if(forceClearRegisters) - { - for(int i = 0; i < 10; i++) - { - vx[i] = Float4(0, 0, 0, 0); - vy[i] = Float4(0, 0, 0, 0); - vz[i] = Float4(0, 0, 0, 0); - vw[i] = Float4(0, 0, 0, 0); - } - - for(int i = 0; i < 4; i++) - { - oC[i].r = Float4(0.0f); - oC[i].g = Float4(0.0f); - oC[i].b = Float4(0.0f); - oC[i].a = Float4(0.0f); - } - } - - loopDepth = -1; - enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - - occlusion = 0; - - #if PERF_PROFILE - for(int i = 0; i < PERF_TIMERS; i++) - { - cycles[i] = 0; - } - #endif - } - - Pointer<Byte> constants; - - Pointer<Byte> primitive; - Int cluster; - Pointer<Byte> data; - - Float4 z[4]; - Float4 rhw; - - Float4 Dz[4]; - Float4 Dw; - Float4 Dv[10][4]; - Float4 Df; - - Color4i ¤t; - Color4i &diffuse; - Color4i &specular; - - Color4i ri[6]; - Color4i vi[2]; - Color4i ti[6]; - - Color4f rf[32]; - Array<Float4> vx; - Array<Float4> vy; - Array<Float4> vz; - Array<Float4> vw; - - Color4f vPos; - Color4f vFace; - - Color4f oC[4]; - Float4 oDepth; - - Color4f p0; - Array<Int> aL; - - Array<Int> increment; - Array<Int> iteration; - - Int loopDepth; - Int stackIndex; // FIXME: Inc/decrement callStack - Array<UInt> callStack; - - Int enableIndex; - Array<Int4> enableStack; - Int4 enableBreak; - - // bem(l) offsets and luminance - Float4 du; - Float4 dv; - Short4 L; - - // texm3x3 temporaries - Float4 u_; // FIXME - Float4 v_; // FIXME - Float4 w_; // FIXME - Float4 U; // FIXME - Float4 V; // FIXME - Float4 W; // FIXME - - UInt occlusion; - - #if PERF_PROFILE - Long cycles[PERF_TIMERS]; - #endif - }; - - void quad(Registers &r, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y); - - Float4 interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective); - Float4 interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective); - void stencilTest(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask); - void stencilTest(Registers &r, Byte8 &value, Context::StencilCompareMode stencilCompareMode, bool CCW); - void stencilOperation(Registers &r, Byte8 &newValue, Byte8 &bufferValue, Context::StencilOperation stencilPassOperation, Context::StencilOperation stencilZFailOperation, Context::StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask); - void stencilOperation(Registers &r, Byte8 &output, Byte8 &bufferValue, Context::StencilOperation operation, bool CCW); - Bool depthTest(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask); - void blendTexture(Registers &r, Color4i ¤t, Color4i &temp, Color4i &texture, int stage); - void alphaTest(Registers &r, Int &aMask, Short4 &alpha); - void alphaToCoverage(Registers &r, Int cMask[4], Float4 &alpha); - Bool alphaTest(Registers &r, Int cMask[4], Color4i ¤t); - Bool alphaTest(Registers &r, Int cMask[4], Color4f &c0); - void fogBlend(Registers &r, Color4i ¤t, Float4 &fog, Float4 &z, Float4 &rhw); - void fogBlend(Registers &r, Color4f &c0, Float4 &fog, Float4 &z, Float4 &rhw); - void pixelFog(Registers &r, Float4 &visibility, Float4 &z, Float4 &rhw); - void specularPixel(Color4i ¤t, Color4i &specular); - - void sampleTexture(Registers &r, Color4i &c, int coordinates, int sampler, bool project = false); - void sampleTexture(Registers &r, Color4i &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project = false, bool bias = false, bool fixed12 = true); - void sampleTexture(Registers &r, Color4i &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool project = false, bool bias = false, bool fixed12 = true, bool gradients = false, bool lodProvided = false); - void sampleTexture(Registers &r, Color4f &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false); - - // Raster operations - void clampColor(Color4f oC[4]); - void rasterOperation(Color4i ¤t, Registers &r, Float4 &fog, Pointer<Byte> &cBuffer, Int &x, Int sMask[4], Int zMask[4], Int cMask[4]); - void rasterOperation(Color4f oC[4], Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]); - void blendFactor(Registers &r, const Color4i &blendFactor, const Color4i ¤t, const Color4i &pixel, Context::BlendFactor blendFactorActive); - void blendFactorAlpha(Registers &r, const Color4i &blendFactor, const Color4i ¤t, const Color4i &pixel, Context::BlendFactor blendFactorAlphaActive); - void alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Color4i ¤t, Int &x); - void writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &i, Color4i ¤t, Int &sMask, Int &zMask, Int &cMask); - void blendFactor(Registers &r, const Color4f &blendFactor, const Color4f &oC, const Color4f &pixel, Context::BlendFactor blendFactorActive); - void blendFactorAlpha(Registers &r, const Color4f &blendFactor, const Color4f &oC, const Color4f &pixel, Context::BlendFactor blendFactorAlphaActive); - void alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Color4f &oC, Int &x); - void writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &i, Color4f &oC, Int &sMask, Int &zMask, Int &cMask); - void writeStencil(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask); - void writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask); - - void ps_1_x(Registers &r, Int cMask[4]); - void ps_2_x(Registers &r, Int cMask[4]); - - Short4 convertFixed12(Float4 &cf); - void convertFixed12(Color4i &ci, Color4f &cf); - Float4 convertSigned12(Short4 &ci); - void convertSigned12(Color4f &cf, Color4i &ci); - Float4 convertUnsigned16(UShort4 ci); - UShort4 convertFixed16(Float4 &cf, bool saturate = true); - void convertFixed16(Color4i &ci, Color4f &cf, bool saturate = true); - void sRGBtoLinear16_16(Registers &r, Color4i &c); - void sRGBtoLinear12_16(Registers &r, Color4i &c); - void linearToSRGB16_16(Registers &r, Color4i &c); - void linearToSRGB12_16(Registers &r, Color4i &c); - Float4 sRGBtoLinear(const Float4 &x); - Float4 linearToSRGB(const Float4 &x); - - typedef Shader::Instruction::DestinationParameter Dst; - typedef Shader::Instruction::SourceParameter Src; - typedef Shader::Instruction::Operation Op; - typedef Shader::Instruction::Operation::Control Control; - - // ps_1_x instructions - void MOV(Color4i &dst, Color4i &src0); - void ADD(Color4i &dst, Color4i &src0, Color4i &src1); - void SUB(Color4i &dst, Color4i &src0, Color4i &src1); - void MAD(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2); - void MUL(Color4i &dst, Color4i &src0, Color4i &src1); - void DP3(Color4i &dst, Color4i &src0, Color4i &src1); - void DP4(Color4i &dst, Color4i &src0, Color4i &src1); - void LRP(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2); - void TEXCOORD(Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate); - void TEXCRD(Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project); - void TEXDP3(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src); - void TEXDP3TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0); - void TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s); - void TEXKILL(Int cMask[4], Color4i &dst); - void TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, bool project); - void TEXLD(Registers &r, Color4i &dst, Color4i &src, int stage, bool project); - void TEXBEM(Registers &r, Color4i &dst, Color4i &src, Float4 &u, Float4 &v, Float4 &s, int stage); - void TEXBEML(Registers &r, Color4i &dst, Color4i &src, Float4 &u, Float4 &v, Float4 &s, int stage); - void TEXREG2AR(Registers &r, Color4i &dst, Color4i &src0, int stage); - void TEXREG2GB(Registers &r, Color4i &dst, Color4i &src0, int stage); - void TEXREG2RGB(Registers &r, Color4i &dst, Color4i &src0, int stage); - void TEXM3X2DEPTH(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src, bool signedScaling); - void TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, int component, bool signedScaling); - void TEXM3X2TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, bool signedScaling); - void TEXM3X3(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, bool signedScaling); - void TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, int component, bool signedScaling); - void TEXM3X3SPEC(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, Color4i &src1); - void TEXM3X3TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, bool singedScaling); - void TEXM3X3VSPEC(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0); - void TEXDEPTH(Registers &r); - void CND(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2); - void CMP(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2); - void BEM(Registers &r, Color4i &dst, Color4i &src0, Color4i &src1, int stage); - - // ps_2_x instructions - void M3X2(Registers &r, Color4f &dst, Color4f &src0, const Src &src1); - void M3X3(Registers &r, Color4f &dst, Color4f &src0, const Src &src1); - void M3X4(Registers &r, Color4f &dst, Color4f &src0, const Src &src1); - void M4X3(Registers &r, Color4f &dst, Color4f &src0, const Src &src1); - void M4X4(Registers &r, Color4f &dst, Color4f &src0, const Src &src1); - void TEXLD(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, bool project, bool bias); - void TEXLDD(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, Color4f &src2, Color4f &src3, bool project, bool bias); - void TEXLDL(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, bool project, bool bias); - void TEXKILL(Int cMask[4], Color4f &src, unsigned char mask); - void DSX(Color4f &dst, Color4f &src); - void DSY(Color4f &dst, Color4f &src); - void BREAK(Registers &r); - void BREAKC(Registers &r, Color4f &src0, Color4f &src1, Control); - void BREAKP(Registers &r, const Src &predicateRegister); - void CALL(Registers &r, int labelIndex); - void CALLNZ(Registers &r, int labelIndex, const Src &src); - void CALLNZb(Registers &r, int labelIndex, const Src &boolRegister); - void CALLNZp(Registers &r, int labelIndex, const Src &predicateRegister); - void ELSE(Registers &r); - void ENDIF(Registers &r); - void ENDLOOP(Registers &r); - void ENDREP(Registers &r); - void IF(Registers &r, const Src &src); - void IFb(Registers &r, const Src &boolRegister); - void IFp(Registers &r, const Src &predicateRegister); - void IFC(Registers &r, Color4f &src0, Color4f &src1, Control); - void LABEL(int labelIndex); - void LOOP(Registers &r, const Src &integerRegister); - void REP(Registers &r, const Src &integerRegister); - void RET(Registers &r); - - void readConstant(Registers &r, int index); - - void writeDestination(Registers &r, Color4i &d, const Dst &dst); - Color4i regi(Registers &r, const Src &src); - Color4f reg(Registers &r, const Src &src, int offset = 0); - - bool colorUsed(); - unsigned short pixelShaderVersion() const; - - private: - SamplerCore *sampler[16]; - - bool perturbate; - bool luminance; - bool previousScaling; - - bool returns; - int ifDepth; - int loopRepDepth; - int breakDepth; - - // FIXME: Get rid of llvm:: - llvm::BasicBlock *ifFalseBlock[24 + 24]; - llvm::BasicBlock *loopRepTestBlock[4]; - llvm::BasicBlock *loopRepEndBlock[4]; - llvm::BasicBlock *labelBlock[2048]; - std::vector<llvm::BasicBlock*> callRetBlock; - llvm::BasicBlock *returnBlock; - bool isConditionalIf[24 + 24]; - - const PixelShader *const pixelShader; - }; -} - -#endif // sw_PixelRoutine_hpp +// SwiftShader Software Renderer +// +// Copyright(c) 2005-2012 TransGaming Inc. +// +// All rights reserved. No part of this software may be copied, distributed, transmitted, +// transcribed, stored in a retrieval system, translated into any human or computer +// language by any means, or disclosed to third parties without the explicit written +// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express +// or implied, including but not limited to any patent rights, are granted to you. +// + +#ifndef sw_PixelRoutine_hpp +#define sw_PixelRoutine_hpp + +#include "Rasterizer.hpp" +#include "ShaderCore.hpp" +#include "PixelShader.hpp" + +#include "Types.hpp" + +namespace sw +{ + extern bool forceClearRegisters; + + class PixelShader; + class SamplerCore; + + class PixelRoutine : public Rasterizer, public ShaderCore + { + friend PixelProcessor; // FIXME + + public: + PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader); + + ~PixelRoutine(); + + protected: + struct Registers + { + Registers(const PixelShader *shader) : + current(ri[0]), diffuse(vi[0]), specular(vi[1]), + rf(shader && shader->dynamicallyIndexedTemporaries), + vf(shader && shader->dynamicallyIndexedInput) + { + if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters) + { + for(int i = 0; i < 10; i++) + { + vf[i].x = Float4(0.0f); + vf[i].y = Float4(0.0f); + vf[i].z = Float4(0.0f); + vf[i].w = Float4(0.0f); + } + } + + loopDepth = -1; + enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + + if(shader && shader->containsBreakInstruction()) + { + enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + if(shader && shader->containsContinueInstruction()) + { + enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + if(shader && shader->containsLeaveInstruction()) + { + enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + occlusion = 0; + + #if PERF_PROFILE + for(int i = 0; i < PERF_TIMERS; i++) + { + cycles[i] = 0; + } + #endif + } + + Pointer<Byte> constants; + + Pointer<Byte> primitive; + Int cluster; + Pointer<Byte> data; + + Float4 z[4]; + Float4 w; + Float4 rhw; + + Float4 Dz[4]; + Float4 Dw; + Float4 Dv[10][4]; + Float4 Df; + + Vector4i ¤t; + Vector4i &diffuse; + Vector4i &specular; + + Vector4i ri[6]; + Vector4i vi[2]; + Vector4i ti[6]; + + RegisterArray<4096> rf; + RegisterArray<10> vf; + + Vector4f vPos; + Vector4f vFace; + + Vector4f oC[4]; + Float4 oDepth; + + Vector4f p0; + Array<Int, 4> aL; + + Array<Int, 4> increment; + Array<Int, 4> iteration; + + Int loopDepth; + Int stackIndex; // FIXME: Inc/decrement callStack + Array<UInt, 4> callStack; + + Int enableIndex; + Array<Int4, 1 + 24> enableStack; + Int4 enableBreak; + Int4 enableContinue; + Int4 enableLeave; + + // bem(l) offsets and luminance + Float4 du; + Float4 dv; + Short4 L; + + // texm3x3 temporaries + Float4 u_; // FIXME + Float4 v_; // FIXME + Float4 w_; // FIXME + Float4 U; // FIXME + Float4 V; // FIXME + Float4 W; // FIXME + + UInt occlusion; + + #if PERF_PROFILE + Long cycles[PERF_TIMERS]; + #endif + }; + + typedef Shader::DestinationParameter Dst; + typedef Shader::SourceParameter Src; + typedef Shader::Control Control; + + void quad(Registers &r, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y); + + Float4 interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective); + Float4 interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective); + void stencilTest(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask); + void stencilTest(Registers &r, Byte8 &value, Context::StencilCompareMode stencilCompareMode, bool CCW); + void stencilOperation(Registers &r, Byte8 &newValue, Byte8 &bufferValue, Context::StencilOperation stencilPassOperation, Context::StencilOperation stencilZFailOperation, Context::StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask); + void stencilOperation(Registers &r, Byte8 &output, Byte8 &bufferValue, Context::StencilOperation operation, bool CCW); + Bool depthTest(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask); + void blendTexture(Registers &r, Vector4i ¤t, Vector4i &temp, Vector4i &texture, int stage); + void alphaTest(Registers &r, Int &aMask, Short4 &alpha); + void alphaToCoverage(Registers &r, Int cMask[4], Float4 &alpha); + Bool alphaTest(Registers &r, Int cMask[4], Vector4i ¤t); + Bool alphaTest(Registers &r, Int cMask[4], Vector4f &c0); + void fogBlend(Registers &r, Vector4i ¤t, Float4 &fog, Float4 &z, Float4 &rhw); + void fogBlend(Registers &r, Vector4f &c0, Float4 &fog, Float4 &z, Float4 &rhw); + void pixelFog(Registers &r, Float4 &visibility, Float4 &z, Float4 &rhw); + void specularPixel(Vector4i ¤t, Vector4i &specular); + + void sampleTexture(Registers &r, Vector4i &c, int coordinates, int sampler, bool project = false); + void sampleTexture(Registers &r, Vector4i &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project = false, bool bias = false, bool fixed12 = true); + void sampleTexture(Registers &r, Vector4i &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool fixed12 = true, bool gradients = false, bool lodProvided = false); + void sampleTexture(Registers &r, Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false); + void sampleTexture(Registers &r, Vector4f &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false); + + // Raster operations + void clampColor(Vector4f oC[4]); + void rasterOperation(Vector4i ¤t, Registers &r, Float4 &fog, Pointer<Byte> &cBuffer, Int &x, Int sMask[4], Int zMask[4], Int cMask[4]); + void rasterOperation(Vector4f oC[4], Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]); + void blendFactor(Registers &r, const Vector4i &blendFactor, const Vector4i ¤t, const Vector4i &pixel, Context::BlendFactor blendFactorActive); + void blendFactorAlpha(Registers &r, const Vector4i &blendFactor, const Vector4i ¤t, const Vector4i &pixel, Context::BlendFactor blendFactorAlphaActive); + void alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4i ¤t, Int &x); + void writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &i, Vector4i ¤t, Int &sMask, Int &zMask, Int &cMask); + void blendFactor(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, Context::BlendFactor blendFactorActive); + void blendFactorAlpha(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, Context::BlendFactor blendFactorAlphaActive); + void alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x); + void writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &i, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask); + void writeStencil(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask); + void writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask); + + void ps_1_x(Registers &r, Int cMask[4]); + void ps_2_x(Registers &r, Int cMask[4]); + + Short4 convertFixed12(RValue<Float4> cf); + void convertFixed12(Vector4i &ci, Vector4f &cf); + Float4 convertSigned12(Short4 &ci); + void convertSigned12(Vector4f &cf, Vector4i &ci); + Float4 convertUnsigned16(UShort4 ci); + UShort4 convertFixed16(Float4 &cf, bool saturate = true); + void convertFixed16(Vector4i &ci, Vector4f &cf, bool saturate = true); + void sRGBtoLinear16_16(Registers &r, Vector4i &c); + void sRGBtoLinear12_16(Registers &r, Vector4i &c); + void linearToSRGB16_16(Registers &r, Vector4i &c); + void linearToSRGB12_16(Registers &r, Vector4i &c); + Float4 sRGBtoLinear(const Float4 &x); + Float4 linearToSRGB(const Float4 &x); + + // ps_1_x instructions + void MOV(Vector4i &dst, Vector4i &src0); + void ADD(Vector4i &dst, Vector4i &src0, Vector4i &src1); + void SUB(Vector4i &dst, Vector4i &src0, Vector4i &src1); + void MAD(Vector4i &dst, Vector4i &src0, Vector4i &src1, Vector4i &src2); + void MUL(Vector4i &dst, Vector4i &src0, Vector4i &src1); + void DP3(Vector4i &dst, Vector4i &src0, Vector4i &src1); + void DP4(Vector4i &dst, Vector4i &src0, Vector4i &src1); + void LRP(Vector4i &dst, Vector4i &src0, Vector4i &src1, Vector4i &src2); + void TEXCOORD(Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate); + void TEXCRD(Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project); + void TEXDP3(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, Vector4i &src); + void TEXDP3TEX(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0); + void TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s); + void TEXKILL(Int cMask[4], Vector4i &dst); + void TEX(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, bool project); + void TEXLD(Registers &r, Vector4i &dst, Vector4i &src, int stage, bool project); + void TEXBEM(Registers &r, Vector4i &dst, Vector4i &src, Float4 &u, Float4 &v, Float4 &s, int stage); + void TEXBEML(Registers &r, Vector4i &dst, Vector4i &src, Float4 &u, Float4 &v, Float4 &s, int stage); + void TEXREG2AR(Registers &r, Vector4i &dst, Vector4i &src0, int stage); + void TEXREG2GB(Registers &r, Vector4i &dst, Vector4i &src0, int stage); + void TEXREG2RGB(Registers &r, Vector4i &dst, Vector4i &src0, int stage); + void TEXM3X2DEPTH(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, Vector4i &src, bool signedScaling); + void TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4i &src0, int component, bool signedScaling); + void TEXM3X2TEX(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0, bool signedScaling); + void TEXM3X3(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, Vector4i &src0, bool signedScaling); + void TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4i &src0, int component, bool signedScaling); + void TEXM3X3SPEC(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0, Vector4i &src1); + void TEXM3X3TEX(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0, bool singedScaling); + void TEXM3X3VSPEC(Registers &r, Vector4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4i &src0); + void TEXDEPTH(Registers &r); + void CND(Vector4i &dst, Vector4i &src0, Vector4i &src1, Vector4i &src2); + void CMP(Vector4i &dst, Vector4i &src0, Vector4i &src1, Vector4i &src2); + void BEM(Registers &r, Vector4i &dst, Vector4i &src0, Vector4i &src1, int stage); + + // ps_2_x instructions + void M3X2(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1); + void M3X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1); + void M3X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1); + void M4X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1); + void M4X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1); + void TEXLD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias); + void TEXLDD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project, bool bias); + void TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias); + void TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask); + void DISCARD(Registers &r, Int cMask[4], const Shader::Instruction *instruction); + void DFDX(Vector4f &dst, Vector4f &src); + void DFDY(Vector4f &dst, Vector4f &src); + void FWIDTH(Vector4f &dst, Vector4f &src); + void BREAK(Registers &r); + void BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control); + void BREAKP(Registers &r, const Src &predicateRegister); + void BREAK(Registers &r, Int4 &condition); + void CONTINUE(Registers &r); + void TEST(); + void CALL(Registers &r, int labelIndex, int callSiteIndex); + void CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src); + void CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister); + void CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister); + void ELSE(Registers &r); + void ENDIF(Registers &r); + void ENDLOOP(Registers &r); + void ENDREP(Registers &r); + void ENDWHILE(Registers &r); + void IF(Registers &r, const Src &src); + void IFb(Registers &r, const Src &boolRegister); + void IFp(Registers &r, const Src &predicateRegister); + void IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control); + void IF(Registers &r, Int4 &condition); + void LABEL(int labelIndex); + void LOOP(Registers &r, const Src &integerRegister); + void REP(Registers &r, const Src &integerRegister); + void WHILE(Registers &r, const Src &temporaryRegister); + void RET(Registers &r); + void LEAVE(Registers &r); + + void writeDestination(Registers &r, Vector4i &d, const Dst &dst); + Vector4i regi(Registers &r, const Src &src); + Vector4f reg(Registers &r, const Src &src, int offset = 0); + Vector4f readConstant(Registers &r, const Src &src, int offset = 0); + Int relativeAddress(Registers &r, const Shader::Parameter &var); + Int4 enableMask(Registers &r, const Shader::Instruction *instruction); + + bool colorUsed(); + unsigned short shaderVersion() const; + bool interpolateZ() const; + bool interpolateW() const; + + const PixelShader *const shader; + + private: + SamplerCore *sampler[16]; + + bool perturbate; + bool luminance; + bool previousScaling; + + int ifDepth; + int loopRepDepth; + int breakDepth; + int currentLabel; + bool whileTest; + + // FIXME: Get rid of llvm:: + llvm::BasicBlock *ifFalseBlock[24 + 24]; + llvm::BasicBlock *loopRepTestBlock[4]; + llvm::BasicBlock *loopRepEndBlock[4]; + llvm::BasicBlock *labelBlock[2048]; + std::vector<llvm::BasicBlock*> callRetBlock[2048]; + llvm::BasicBlock *returnBlock; + bool isConditionalIf[24 + 24]; + }; +} + +#endif // sw_PixelRoutine_hpp
diff --git a/src/Shader/PixelShader.cpp b/src/Shader/PixelShader.cpp index 7bf072a..3a43892 100644 --- a/src/Shader/PixelShader.cpp +++ b/src/Shader/PixelShader.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -15,50 +15,44 @@ namespace sw { - PixelShader::PixelShader(const unsigned long *token) : Shader(token) + PixelShader::PixelShader(const PixelShader *ps) : Shader() + { + version = 0x0300; + vPosDeclared = false; + vFaceDeclared = false; + centroid = false; + + if(ps) // Make a copy + { + for(int i = 0; i < ps->getLength(); i++) + { + append(new sw::Shader::Instruction(*ps->getInstruction(i))); + } + + memcpy(semantic, ps->semantic, sizeof(semantic)); + vPosDeclared = ps->vPosDeclared; + vFaceDeclared = ps->vFaceDeclared; + usedSamplers = ps->usedSamplers; + + analyze(); + } + } + + PixelShader::PixelShader(const unsigned long *token) : Shader() { parse(token); + + vPosDeclared = false; + vFaceDeclared = false; + centroid = false; + + analyze(); } PixelShader::~PixelShader() { } - void PixelShader::parse(const unsigned long *token) - { - minorVersion = (unsigned char)(token[0] & 0x000000FF); - majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8); - shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16); - - length = validate(token); - ASSERT(length != 0); - - instruction = new Shader::Instruction*[length]; - - for(int i = 0; i < length; i++) - { - while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token - { - int length = (*token & 0x7FFF0000) >> 16; - - token += length + 1; - } - - int length = size(*token); - - instruction[i] = new Instruction(token, length, majorVersion); - - token += length + 1; - } - - analyzeZOverride(); - analyzeTexkill(); - analyzeInterpolants(); - analyzeDirtyConstants(); - analyzeDynamicBranching(); - analyzeSamplers(); - } - int PixelShader::validate(const unsigned long *const token) { if(!token) @@ -88,12 +82,12 @@ } else { - ShaderOpcode opcode = (ShaderOpcode)(token[i] & 0x0000FFFF); + Shader::Opcode opcode = (Shader::Opcode)(token[i] & 0x0000FFFF); switch(opcode) { - case ShaderOperation::OPCODE_RESERVED0: - case ShaderOperation::OPCODE_MOVA: + case Shader::OPCODE_RESERVED0: + case Shader::OPCODE_MOVA: return 0; // Unsupported operation default: instructionCount++; @@ -112,9 +106,9 @@ return zOverride; } - bool PixelShader::containsTexkill() const + bool PixelShader::containsKill() const { - return texkill; + return kill; } bool PixelShader::containsCentroid() const @@ -137,15 +131,27 @@ return semantic[2 + coordinate][component].active(); } + void PixelShader::analyze() + { + analyzeZOverride(); + analyzeKill(); + analyzeInterpolants(); + analyzeDirtyConstants(); + analyzeDynamicBranching(); + analyzeSamplers(); + analyzeCallSites(); + analyzeDynamicIndexing(); + } + void PixelShader::analyzeZOverride() { zOverride = false; - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - if(instruction[i]->getOpcode() == Instruction::Operation::OPCODE_TEXM3X2DEPTH || - instruction[i]->getOpcode() == Instruction::Operation::OPCODE_TEXDEPTH || - instruction[i]->getDestinationParameter().type == Instruction::DestinationParameter::PARAMETER_DEPTHOUT) + if(instruction[i]->opcode == Shader::OPCODE_TEXM3X2DEPTH || + instruction[i]->opcode == Shader::OPCODE_TEXDEPTH || + instruction[i]->dst.type == Shader::PARAMETER_DEPTHOUT) { zOverride = true; @@ -154,15 +160,16 @@ } } - void PixelShader::analyzeTexkill() + void PixelShader::analyzeKill() { - texkill = false; + kill = false; - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - if(instruction[i]->getOpcode() == Instruction::Operation::OPCODE_TEXKILL) + if(instruction[i]->opcode == Shader::OPCODE_TEXKILL || + instruction[i]->opcode == Shader::OPCODE_DISCARD) { - texkill = true; + kill = true; break; } @@ -171,76 +178,72 @@ void PixelShader::analyzeInterpolants() { - vPosDeclared = false; - vFaceDeclared = false; - centroid = false; - if(version < 0x0300) { // Set default mapping; disable unused interpolants below - semantic[0][0] = Semantic(ShaderOperation::USAGE_COLOR, 0); - semantic[0][1] = Semantic(ShaderOperation::USAGE_COLOR, 0); - semantic[0][2] = Semantic(ShaderOperation::USAGE_COLOR, 0); - semantic[0][3] = Semantic(ShaderOperation::USAGE_COLOR, 0); + semantic[0][0] = Semantic(Shader::USAGE_COLOR, 0); + semantic[0][1] = Semantic(Shader::USAGE_COLOR, 0); + semantic[0][2] = Semantic(Shader::USAGE_COLOR, 0); + semantic[0][3] = Semantic(Shader::USAGE_COLOR, 0); - semantic[1][0] = Semantic(ShaderOperation::USAGE_COLOR, 1); - semantic[1][1] = Semantic(ShaderOperation::USAGE_COLOR, 1); - semantic[1][2] = Semantic(ShaderOperation::USAGE_COLOR, 1); - semantic[1][3] = Semantic(ShaderOperation::USAGE_COLOR, 1); + semantic[1][0] = Semantic(Shader::USAGE_COLOR, 1); + semantic[1][1] = Semantic(Shader::USAGE_COLOR, 1); + semantic[1][2] = Semantic(Shader::USAGE_COLOR, 1); + semantic[1][3] = Semantic(Shader::USAGE_COLOR, 1); for(int i = 0; i < 8; i++) { - semantic[2 + i][0] = Semantic(ShaderOperation::USAGE_TEXCOORD, i); - semantic[2 + i][1] = Semantic(ShaderOperation::USAGE_TEXCOORD, i); - semantic[2 + i][2] = Semantic(ShaderOperation::USAGE_TEXCOORD, i); - semantic[2 + i][3] = Semantic(ShaderOperation::USAGE_TEXCOORD, i); + semantic[2 + i][0] = Semantic(Shader::USAGE_TEXCOORD, i); + semantic[2 + i][1] = Semantic(Shader::USAGE_TEXCOORD, i); + semantic[2 + i][2] = Semantic(Shader::USAGE_TEXCOORD, i); + semantic[2 + i][3] = Semantic(Shader::USAGE_TEXCOORD, i); } - Instruction::Operation::SamplerType samplerType[16]; + Shader::SamplerType samplerType[16]; for(int i = 0; i < 16; i++) { - samplerType[i] = Instruction::Operation::SAMPLER_UNKNOWN; + samplerType[i] = Shader::SAMPLER_UNKNOWN; } - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - if(instruction[i]->getDestinationParameter().type == Instruction::SourceParameter::PARAMETER_SAMPLER) + if(instruction[i]->dst.type == Shader::PARAMETER_SAMPLER) { - int sampler = instruction[i]->getDestinationParameter().index; + int sampler = instruction[i]->dst.index; - samplerType[sampler] = instruction[i]->getSamplerType(); + samplerType[sampler] = instruction[i]->samplerType; } } bool interpolant[10][4] = {false}; // Interpolants in use - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - if(instruction[i]->getDestinationParameter().type == Instruction::SourceParameter::PARAMETER_TEXTURE) + if(instruction[i]->dst.type == Shader::PARAMETER_TEXTURE) { - int index = instruction[i]->getDestinationParameter().index + 2; - int mask = instruction[i]->getDestinationParameter().mask; + int index = instruction[i]->dst.index + 2; + int mask = instruction[i]->dst.mask; - switch(instruction[i]->getOpcode()) + switch(instruction[i]->opcode) { - case Instruction::Operation::OPCODE_TEX: - case Instruction::Operation::OPCODE_TEXBEM: - case Instruction::Operation::OPCODE_TEXBEML: - case Instruction::Operation::OPCODE_TEXCOORD: - case Instruction::Operation::OPCODE_TEXDP3: - case Instruction::Operation::OPCODE_TEXDP3TEX: - case Instruction::Operation::OPCODE_TEXM3X2DEPTH: - case Instruction::Operation::OPCODE_TEXM3X2PAD: - case Instruction::Operation::OPCODE_TEXM3X2TEX: - case Instruction::Operation::OPCODE_TEXM3X3: - case Instruction::Operation::OPCODE_TEXM3X3PAD: - case Instruction::Operation::OPCODE_TEXM3X3TEX: + case Shader::OPCODE_TEX: + case Shader::OPCODE_TEXBEM: + case Shader::OPCODE_TEXBEML: + case Shader::OPCODE_TEXCOORD: + case Shader::OPCODE_TEXDP3: + case Shader::OPCODE_TEXDP3TEX: + case Shader::OPCODE_TEXM3X2DEPTH: + case Shader::OPCODE_TEXM3X2PAD: + case Shader::OPCODE_TEXM3X2TEX: + case Shader::OPCODE_TEXM3X3: + case Shader::OPCODE_TEXM3X3PAD: + case Shader::OPCODE_TEXM3X3TEX: interpolant[index][0] = true; interpolant[index][1] = true; interpolant[index][2] = true; break; - case Instruction::Operation::OPCODE_TEXKILL: + case Shader::OPCODE_TEXKILL: if(majorVersion < 2) { interpolant[index][0] = true; @@ -255,7 +258,7 @@ interpolant[index][3] = true; } break; - case Instruction::Operation::OPCODE_TEXM3X3VSPEC: + case Shader::OPCODE_TEXM3X3VSPEC: interpolant[index][0] = true; interpolant[index][1] = true; interpolant[index][2] = true; @@ -263,7 +266,7 @@ interpolant[index - 1][3] = true; interpolant[index - 0][3] = true; break; - case Instruction::Operation::OPCODE_DCL: + case Shader::OPCODE_DCL: break; // Ignore default: // Arithmetic instruction if(version >= 0x0104) @@ -275,32 +278,32 @@ for(int argument = 0; argument < 4; argument++) { - if(instruction[i]->getSourceParameter(argument).type == Instruction::SourceParameter::PARAMETER_INPUT || - instruction[i]->getSourceParameter(argument).type == Instruction::SourceParameter::PARAMETER_TEXTURE) + if(instruction[i]->src[argument].type == Shader::PARAMETER_INPUT || + instruction[i]->src[argument].type == Shader::PARAMETER_TEXTURE) { - int index = instruction[i]->getSourceParameter(argument).index; - int swizzle = instruction[i]->getSourceParameter(argument).swizzle; - int mask = instruction[i]->getDestinationParameter().mask; + int index = instruction[i]->src[argument].index; + int swizzle = instruction[i]->src[argument].swizzle; + int mask = instruction[i]->dst.mask; - if(instruction[i]->getSourceParameter(argument).type == Instruction::SourceParameter::PARAMETER_TEXTURE) + if(instruction[i]->src[argument].type == Shader::PARAMETER_TEXTURE) { index += 2; } - switch(instruction[i]->getOpcode()) + switch(instruction[i]->opcode) { - case Instruction::Operation::OPCODE_TEX: - case Instruction::Operation::OPCODE_TEXLDD: - case Instruction::Operation::OPCODE_TEXLDL: + case Shader::OPCODE_TEX: + case Shader::OPCODE_TEXLDD: + case Shader::OPCODE_TEXLDL: { - int sampler = instruction[i]->getSourceParameter(1).index; + int sampler = instruction[i]->src[1].index; switch(samplerType[sampler]) { - case Instruction::Operation::SAMPLER_UNKNOWN: + case Shader::SAMPLER_UNKNOWN: if(version == 0x0104) { - if((instruction[i]->getSourceParameter(0).swizzle & 0x30) == 0x20) // .xyz + if((instruction[i]->src[0].swizzle & 0x30) == 0x20) // .xyz { interpolant[index][0] = true; interpolant[index][1] = true; @@ -318,19 +321,19 @@ ASSERT(false); } break; - case Instruction::Operation::SAMPLER_1D: + case Shader::SAMPLER_1D: interpolant[index][0] = true; break; - case Instruction::Operation::SAMPLER_2D: + case Shader::SAMPLER_2D: interpolant[index][0] = true; interpolant[index][1] = true; break; - case Instruction::Operation::SAMPLER_CUBE: + case Shader::SAMPLER_CUBE: interpolant[index][0] = true; interpolant[index][1] = true; interpolant[index][2] = true; break; - case Instruction::Operation::SAMPLER_VOLUME: + case Shader::SAMPLER_VOLUME: interpolant[index][0] = true; interpolant[index][1] = true; interpolant[index][2] = true; @@ -339,31 +342,31 @@ ASSERT(false); } - if(instruction[i]->isBias()) + if(instruction[i]->bias) { interpolant[index][3] = true; } - if(instruction[i]->isProject()) + if(instruction[i]->project) { interpolant[index][3] = true; } - if(version == 0x0104 && instruction[i]->getOpcode() == Instruction::Operation::OPCODE_TEX) + if(version == 0x0104 && instruction[i]->opcode == Shader::OPCODE_TEX) { - if(instruction[i]->getSourceParameter(0).modifier == Instruction::SourceParameter::MODIFIER_DZ) + if(instruction[i]->src[0].modifier == Shader::MODIFIER_DZ) { interpolant[index][2] = true; } - if(instruction[i]->getSourceParameter(0).modifier == Instruction::SourceParameter::MODIFIER_DW) + if(instruction[i]->src[0].modifier == Shader::MODIFIER_DW) { interpolant[index][3] = true; } } } break; - case Instruction::Operation::OPCODE_M3X2: + case Shader::OPCODE_M3X2: if(mask & 0x1) { interpolant[index][0] |= swizzleContainsComponentMasked(swizzle, 0, 0x7); @@ -383,7 +386,7 @@ } } break; - case Instruction::Operation::OPCODE_M3X3: + case Shader::OPCODE_M3X3: if(mask & 0x1) { interpolant[index][0] |= swizzleContainsComponentMasked(swizzle, 0, 0x7); @@ -411,7 +414,7 @@ } } break; - case Instruction::Operation::OPCODE_M3X4: + case Shader::OPCODE_M3X4: if(mask & 0x1) { interpolant[index][0] |= swizzleContainsComponentMasked(swizzle, 0, 0x7); @@ -447,7 +450,7 @@ } } break; - case Instruction::Operation::OPCODE_M4X3: + case Shader::OPCODE_M4X3: if(mask & 0x1) { interpolant[index][0] |= swizzleContainsComponent(swizzle, 0); @@ -475,7 +478,7 @@ } } break; - case Instruction::Operation::OPCODE_M4X4: + case Shader::OPCODE_M4X4: if(mask & 0x1) { interpolant[index][0] |= swizzleContainsComponent(swizzle, 0); @@ -511,7 +514,7 @@ } } break; - case Instruction::Operation::OPCODE_CRS: + case Shader::OPCODE_CRS: if(mask & 0x1) { interpolant[index][0] |= swizzleContainsComponentMasked(swizzle, 0, 0x6); @@ -536,7 +539,7 @@ interpolant[index][3] |= swizzleContainsComponentMasked(swizzle, 3, 0x3); } break; - case Instruction::Operation::OPCODE_DP2ADD: + case Shader::OPCODE_DP2ADD: if(argument == 0 || argument == 1) { interpolant[index][0] |= swizzleContainsComponentMasked(swizzle, 0, 0x3); @@ -552,81 +555,81 @@ interpolant[index][3] |= swizzleContainsComponent(swizzle, 3); } break; - case Instruction::Operation::OPCODE_DP3: + case Shader::OPCODE_DP3: interpolant[index][0] |= swizzleContainsComponentMasked(swizzle, 0, 0x7); interpolant[index][1] |= swizzleContainsComponentMasked(swizzle, 1, 0x7); interpolant[index][2] |= swizzleContainsComponentMasked(swizzle, 2, 0x7); interpolant[index][3] |= swizzleContainsComponentMasked(swizzle, 3, 0x7); break; - case Instruction::Operation::OPCODE_DP4: + case Shader::OPCODE_DP4: interpolant[index][0] |= swizzleContainsComponent(swizzle, 0); interpolant[index][1] |= swizzleContainsComponent(swizzle, 1); interpolant[index][2] |= swizzleContainsComponent(swizzle, 2); interpolant[index][3] |= swizzleContainsComponent(swizzle, 3); break; - case Instruction::Operation::OPCODE_SINCOS: - case Instruction::Operation::OPCODE_EXP: - case Instruction::Operation::OPCODE_LOG: - case Instruction::Operation::OPCODE_POW: - case Instruction::Operation::OPCODE_RCP: - case Instruction::Operation::OPCODE_RSQ: + case Shader::OPCODE_SINCOS: + case Shader::OPCODE_EXP2X: + case Shader::OPCODE_LOG2X: + case Shader::OPCODE_POWX: + case Shader::OPCODE_RCPX: + case Shader::OPCODE_RSQX: interpolant[index][0] |= swizzleContainsComponent(swizzle, 0); interpolant[index][1] |= swizzleContainsComponent(swizzle, 1); interpolant[index][2] |= swizzleContainsComponent(swizzle, 2); interpolant[index][3] |= swizzleContainsComponent(swizzle, 3); break; - case Instruction::Operation::OPCODE_NRM: + case Shader::OPCODE_NRM3: interpolant[index][0] |= swizzleContainsComponentMasked(swizzle, 0, 0x7 | mask); interpolant[index][1] |= swizzleContainsComponentMasked(swizzle, 1, 0x7 | mask); interpolant[index][2] |= swizzleContainsComponentMasked(swizzle, 2, 0x7 | mask); interpolant[index][3] |= swizzleContainsComponentMasked(swizzle, 3, 0x7 | mask); break; - case Instruction::Operation::OPCODE_MOV: - case Instruction::Operation::OPCODE_ADD: - case Instruction::Operation::OPCODE_SUB: - case Instruction::Operation::OPCODE_MUL: - case Instruction::Operation::OPCODE_MAD: - case Instruction::Operation::OPCODE_ABS: - case Instruction::Operation::OPCODE_CMP: - case Instruction::Operation::OPCODE_CND: - case Instruction::Operation::OPCODE_FRC: - case Instruction::Operation::OPCODE_LRP: - case Instruction::Operation::OPCODE_MAX: - case Instruction::Operation::OPCODE_MIN: - case Instruction::Operation::OPCODE_SETP: - case Instruction::Operation::OPCODE_BREAKC: - case Instruction::Operation::OPCODE_DSX: - case Instruction::Operation::OPCODE_DSY: + case Shader::OPCODE_MOV: + case Shader::OPCODE_ADD: + case Shader::OPCODE_SUB: + case Shader::OPCODE_MUL: + case Shader::OPCODE_MAD: + case Shader::OPCODE_ABS: + case Shader::OPCODE_CMP0: + case Shader::OPCODE_CND: + case Shader::OPCODE_FRC: + case Shader::OPCODE_LRP: + case Shader::OPCODE_MAX: + case Shader::OPCODE_MIN: + case Shader::OPCODE_CMP: + case Shader::OPCODE_BREAKC: + case Shader::OPCODE_DFDX: + case Shader::OPCODE_DFDY: interpolant[index][0] |= swizzleContainsComponentMasked(swizzle, 0, mask); interpolant[index][1] |= swizzleContainsComponentMasked(swizzle, 1, mask); interpolant[index][2] |= swizzleContainsComponentMasked(swizzle, 2, mask); interpolant[index][3] |= swizzleContainsComponentMasked(swizzle, 3, mask); break; - case Instruction::Operation::OPCODE_TEXCOORD: + case Shader::OPCODE_TEXCOORD: interpolant[index][0] = true; interpolant[index][1] = true; interpolant[index][2] = true; interpolant[index][3] = true; break; - case Instruction::Operation::OPCODE_TEXDP3: - case Instruction::Operation::OPCODE_TEXDP3TEX: - case Instruction::Operation::OPCODE_TEXM3X2PAD: - case Instruction::Operation::OPCODE_TEXM3X3PAD: - case Instruction::Operation::OPCODE_TEXM3X2TEX: - case Instruction::Operation::OPCODE_TEXM3X3SPEC: - case Instruction::Operation::OPCODE_TEXM3X3VSPEC: - case Instruction::Operation::OPCODE_TEXBEM: - case Instruction::Operation::OPCODE_TEXBEML: - case Instruction::Operation::OPCODE_TEXM3X2DEPTH: - case Instruction::Operation::OPCODE_TEXM3X3: - case Instruction::Operation::OPCODE_TEXM3X3TEX: + case Shader::OPCODE_TEXDP3: + case Shader::OPCODE_TEXDP3TEX: + case Shader::OPCODE_TEXM3X2PAD: + case Shader::OPCODE_TEXM3X3PAD: + case Shader::OPCODE_TEXM3X2TEX: + case Shader::OPCODE_TEXM3X3SPEC: + case Shader::OPCODE_TEXM3X3VSPEC: + case Shader::OPCODE_TEXBEM: + case Shader::OPCODE_TEXBEML: + case Shader::OPCODE_TEXM3X2DEPTH: + case Shader::OPCODE_TEXM3X3: + case Shader::OPCODE_TEXM3X3TEX: interpolant[index][0] = true; interpolant[index][1] = true; interpolant[index][2] = true; break; - case Instruction::Operation::OPCODE_TEXREG2AR: - case Instruction::Operation::OPCODE_TEXREG2GB: - case Instruction::Operation::OPCODE_TEXREG2RGB: + case Shader::OPCODE_TEXREG2AR: + case Shader::OPCODE_TEXREG2GB: + case Shader::OPCODE_TEXREG2RGB: break; default: // ASSERT(false); // Refine component usage @@ -652,40 +655,25 @@ } else // Shader Model 3.0 input declaration; v# indexable { - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - if(instruction[i]->getOpcode() == ShaderOperation::OPCODE_DCL) + if(instruction[i]->opcode == Shader::OPCODE_DCL) { - if(instruction[i]->getDestinationParameter().type == ShaderParameter::PARAMETER_INPUT) + if(instruction[i]->dst.type == Shader::PARAMETER_INPUT) { - unsigned char usage = instruction[i]->getUsage(); - unsigned char index = instruction[i]->getUsageIndex(); - unsigned char mask = instruction[i]->getDestinationParameter().mask; - unsigned char reg = instruction[i]->getDestinationParameter().index; + unsigned char usage = instruction[i]->usage; + unsigned char index = instruction[i]->usageIndex; + unsigned char mask = instruction[i]->dst.mask; + unsigned char reg = instruction[i]->dst.index; - if(mask & 0x01) - { - semantic[reg][0] = Semantic(usage, index); - } - - if(mask & 0x02) - { - semantic[reg][1] = Semantic(usage, index); - } - - if(mask & 0x04) - { - semantic[reg][2] = Semantic(usage, index); - } - - if(mask & 0x08) - { - semantic[reg][3] = Semantic(usage, index); - } + if(mask & 0x01) semantic[reg][0] = Semantic(usage, index); + if(mask & 0x02) semantic[reg][1] = Semantic(usage, index); + if(mask & 0x04) semantic[reg][2] = Semantic(usage, index); + if(mask & 0x08) semantic[reg][3] = Semantic(usage, index); } - else if(instruction[i]->getDestinationParameter().type == ShaderParameter::PARAMETER_MISCTYPE) + else if(instruction[i]->dst.type == Shader::PARAMETER_MISCTYPE) { - unsigned char index = instruction[i]->getDestinationParameter().index; + unsigned char index = instruction[i]->dst.index; if(index == 0) { @@ -703,19 +691,19 @@ if(version >= 0x0200) { - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - if(instruction[i]->getOpcode() == ShaderOperation::OPCODE_DCL) + if(instruction[i]->opcode == Shader::OPCODE_DCL) { - bool centroid = instruction[i]->getDestinationParameter().centroid; - unsigned char reg = instruction[i]->getDestinationParameter().index; + bool centroid = instruction[i]->dst.centroid; + unsigned char reg = instruction[i]->dst.index; - switch(instruction[i]->getDestinationParameter().type) + switch(instruction[i]->dst.type) { - case ShaderParameter::PARAMETER_INPUT: + case Shader::PARAMETER_INPUT: semantic[reg][0].centroid = centroid; break; - case ShaderParameter::PARAMETER_TEXTURE: + case Shader::PARAMETER_TEXTURE: semantic[2 + reg][0].centroid = centroid; break; }
diff --git a/src/Shader/PixelShader.hpp b/src/Shader/PixelShader.hpp index a0f9b99..83ca253 100644 --- a/src/Shader/PixelShader.hpp +++ b/src/Shader/PixelShader.hpp
@@ -1,54 +1,53 @@ -// SwiftShader Software Renderer -// -// Copyright(c) 2005-2011 TransGaming Inc. -// -// All rights reserved. No part of this software may be copied, distributed, transmitted, -// transcribed, stored in a retrieval system, translated into any human or computer -// language by any means, or disclosed to third parties without the explicit written -// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express -// or implied, including but not limited to any patent rights, are granted to you. -// - -#ifndef sw_PixelShader_hpp -#define sw_PixelShader_hpp - -#include "Shader.hpp" - -namespace sw -{ - class PixelShader : public Shader - { - public: - PixelShader(const unsigned long *token); - - virtual ~PixelShader(); - - static int validate(const unsigned long *const token); // Returns number of instructions if valid - bool depthOverride() const; - bool containsTexkill() const; - bool containsCentroid() const; - bool usesDiffuse(int component) const; - bool usesSpecular(int component) const; - bool usesTexture(int coordinate, int component) const; - - Semantic semantic[10][4]; // FIXME: Private - - bool vPosDeclared; - bool vFaceDeclared; - - private: - void parse(const unsigned long *token); - - void analyzeZOverride(); - void analyzeTexkill(); - void analyzeInterpolants(); - - bool zOverride; - bool texkill; - bool centroid; - }; - - typedef PixelShader::Instruction PixelShaderInstruction; -} - -#endif // sw_PixelShader_hpp +// SwiftShader Software Renderer +// +// Copyright(c) 2005-2012 TransGaming Inc. +// +// All rights reserved. No part of this software may be copied, distributed, transmitted, +// transcribed, stored in a retrieval system, translated into any human or computer +// language by any means, or disclosed to third parties without the explicit written +// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express +// or implied, including but not limited to any patent rights, are granted to you. +// + +#ifndef sw_PixelShader_hpp +#define sw_PixelShader_hpp + +#include "Shader.hpp" + +namespace sw +{ + class PixelShader : public Shader + { + public: + explicit PixelShader(const PixelShader *ps = 0); + explicit PixelShader(const unsigned long *token); + + virtual ~PixelShader(); + + static int validate(const unsigned long *const token); // Returns number of instructions if valid + bool depthOverride() const; + bool containsKill() const; + bool containsCentroid() const; + bool usesDiffuse(int component) const; + bool usesSpecular(int component) const; + bool usesTexture(int coordinate, int component) const; + + virtual void analyze(); + + Semantic semantic[10][4]; // FIXME: Private + + bool vPosDeclared; + bool vFaceDeclared; + + private: + void analyzeZOverride(); + void analyzeKill(); + void analyzeInterpolants(); + + bool zOverride; + bool kill; + bool centroid; + }; +} + +#endif // sw_PixelShader_hpp
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp index b7055c4..dbd98b8 100644 --- a/src/Shader/SamplerCore.cpp +++ b/src/Shader/SamplerCore.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -20,7 +20,7 @@ { } - void SamplerCore::sampleTexture(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool bias, bool fixed12, bool gradients, bool lodProvided) + void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool bias, bool fixed12, bool gradients, bool lodProvided) { #if PERF_PROFILE AddAtomic(Pointer<Long>(&profiler.texOperations), Long(4)); @@ -40,17 +40,17 @@ if(state.textureType == TEXTURE_NULL) { - c.r = Short4(0x0000, 0x0000, 0x0000, 0x0000); - c.g = Short4(0x0000, 0x0000, 0x0000, 0x0000); - c.b = Short4(0x0000, 0x0000, 0x0000, 0x0000); + c.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); + c.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); + c.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); if(fixed12) // FIXME: Convert to fixed12 at higher level, when required { - c.a = Short4(0x1000, 0x1000, 0x1000, 0x1000); + c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000); } else { - c.a = Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME + c.w = Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME } } else @@ -87,8 +87,8 @@ if(cubeTexture) { - uuuu += Float4(0.5f, 0.5f, 0.5f, 0.5f); - vvvv += Float4(0.5f, 0.5f, 0.5f, 0.5f); + uuuu += Float4(0.5f); + vvvv += Float4(0.5f); } if(!hasFloatTexture()) @@ -97,7 +97,7 @@ } else { - Color4f cf; + Vector4f cf; sampleFloatFilter(texture, cf, uuuu, vvvv, wwww, lod, anisotropy, uDelta, vDelta, face, lodProvided); @@ -144,40 +144,40 @@ case FORMAT_G8R8: case FORMAT_G16R16: case FORMAT_A16B16G16R16: - if(componentCount < 2) c.g = Short4(0x1000, 0x1000, 0x1000, 0x1000); - if(componentCount < 3) c.b = Short4(0x1000, 0x1000, 0x1000, 0x1000); - if(componentCount < 4) c.a = Short4(0x1000, 0x1000, 0x1000, 0x1000); + if(componentCount < 2) c.y = Short4(0x1000, 0x1000, 0x1000, 0x1000); + if(componentCount < 3) c.z = Short4(0x1000, 0x1000, 0x1000, 0x1000); + if(componentCount < 4) c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000); break; case FORMAT_A8: - c.a = c.r; - c.r = Short4(0x0000, 0x0000, 0x0000, 0x0000); - c.g = Short4(0x0000, 0x0000, 0x0000, 0x0000); - c.b = Short4(0x0000, 0x0000, 0x0000, 0x0000); + c.w = c.x; + c.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); + c.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); + c.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); break; case FORMAT_L8: case FORMAT_L16: - c.g = c.r; - c.b = c.r; - c.a = Short4(0x1000, 0x1000, 0x1000, 0x1000); + c.y = c.x; + c.z = c.x; + c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000); break; case FORMAT_A8L8: - c.a = c.g; - c.g = c.r; - c.b = c.r; + c.w = c.y; + c.y = c.x; + c.z = c.x; break; case FORMAT_R32F: - c.g = Short4(0x1000, 0x1000, 0x1000, 0x1000); + c.y = Short4(0x1000, 0x1000, 0x1000, 0x1000); case FORMAT_G32R32F: - c.b = Short4(0x1000, 0x1000, 0x1000, 0x1000); - c.a = Short4(0x1000, 0x1000, 0x1000, 0x1000); + c.z = Short4(0x1000, 0x1000, 0x1000, 0x1000); + c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000); case FORMAT_A32B32G32R32F: break; case FORMAT_D32F_LOCKABLE: case FORMAT_D32F_TEXTURE: case FORMAT_D32F_SHADOW: - c.g = c.r; - c.b = c.r; - c.a = c.r; + c.y = c.x; + c.z = c.x; + c.w = c.x; break; default: ASSERT(false); @@ -186,7 +186,7 @@ } } - void SamplerCore::sampleTexture(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool bias, bool gradients, bool lodProvided) + void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool bias, bool gradients, bool lodProvided) { #if PERF_PROFILE AddAtomic(Pointer<Long>(&profiler.texOperations), Long(4)); @@ -202,10 +202,10 @@ if(state.textureType == TEXTURE_NULL) { - c.r = Float4(0.0f, 0.0f, 0.0f, 0.0f); - c.g = Float4(0.0f, 0.0f, 0.0f, 0.0f); - c.b = Float4(0.0f, 0.0f, 0.0f, 0.0f); - c.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + c.x = Float4(0.0f); + c.y = Float4(0.0f); + c.z = Float4(0.0f); + c.w = Float4(1.0f); } else { @@ -247,15 +247,15 @@ if(cubeTexture) { - uuuu += Float4(0.5f, 0.5f, 0.5f, 0.5f); - vvvv += Float4(0.5f, 0.5f, 0.5f, 0.5f); + uuuu += Float4(0.5f); + vvvv += Float4(0.5f); } sampleFloatFilter(texture, c, uuuu, vvvv, wwww, lod, anisotropy, uDelta, vDelta, face, lodProvided); } else { - Color4i ci; + Vector4i ci; sampleTexture(texture, ci, u, v, w, q, dsx, dsy, bias, false, gradients, lodProvided); @@ -298,40 +298,40 @@ case FORMAT_G8R8: case FORMAT_G16R16: case FORMAT_A16B16G16R16: - if(componentCount < 2) c.g = Float4(1.0f, 1.0f, 1.0f, 1.0f); - if(componentCount < 3) c.b = Float4(1.0f, 1.0f, 1.0f, 1.0f); - if(componentCount < 4) c.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + if(componentCount < 2) c.y = Float4(1.0f); + if(componentCount < 3) c.z = Float4(1.0f); + if(componentCount < 4) c.w = Float4(1.0f); break; case FORMAT_A8: - c.a = c.r; - c.r = Float4(0.0f, 0.0f, 0.0f, 0.0f); - c.g = Float4(0.0f, 0.0f, 0.0f, 0.0f); - c.b = Float4(0.0f, 0.0f, 0.0f, 0.0f); + c.w = c.x; + c.x = Float4(0.0f); + c.y = Float4(0.0f); + c.z = Float4(0.0f); break; case FORMAT_L8: case FORMAT_L16: - c.g = c.r; - c.b = c.r; - c.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + c.y = c.x; + c.z = c.x; + c.w = Float4(1.0f); break; case FORMAT_A8L8: - c.a = c.g; - c.g = c.r; - c.b = c.r; + c.w = c.y; + c.y = c.x; + c.z = c.x; break; case FORMAT_R32F: - c.g = Float4(1.0f, 1.0f, 1.0f, 1.0f); + c.y = Float4(1.0f); case FORMAT_G32R32F: - c.b = Float4(1.0f, 1.0f, 1.0f, 1.0f); - c.a = Float4(1.0f, 1.0f, 1.0f, 1.0f); + c.z = Float4(1.0f); + c.w = Float4(1.0f); case FORMAT_A32B32G32R32F: break; case FORMAT_D32F_LOCKABLE: case FORMAT_D32F_TEXTURE: case FORMAT_D32F_SHADOW: - c.g = c.r; - c.b = c.r; - c.a = c.r; + c.y = c.x; + c.z = c.x; + c.w = c.x; break; default: ASSERT(false); @@ -377,7 +377,7 @@ return uvw; } - void SamplerCore::sampleFilter(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool lodProvided) + void SamplerCore::sampleFilter(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool lodProvided) { bool volumeTexture = state.textureType == TEXTURE_3D; @@ -385,7 +385,7 @@ if(state.mipmapFilter > MIPMAP_POINT) { - Color4i cc; + Vector4i cc; sampleAniso(texture, cc, u, v, w, lod, anisotropy, uDelta, vDelta, face, true, lodProvided); @@ -394,28 +394,28 @@ UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize Short4 stri = utri >> 1; // FIXME: Optimize - if(hasUnsignedTextureComponent(0)) cc.r = MulHigh(As<UShort4>(cc.r), utri); else cc.r = MulHigh(cc.r, stri); - if(hasUnsignedTextureComponent(1)) cc.g = MulHigh(As<UShort4>(cc.g), utri); else cc.g = MulHigh(cc.g, stri); - if(hasUnsignedTextureComponent(2)) cc.b = MulHigh(As<UShort4>(cc.b), utri); else cc.b = MulHigh(cc.b, stri); - if(hasUnsignedTextureComponent(3)) cc.a = MulHigh(As<UShort4>(cc.a), utri); else cc.a = MulHigh(cc.a, stri); + if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri); + if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri); + if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri); + if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri); utri = ~utri; stri = Short4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF) - stri; - if(hasUnsignedTextureComponent(0)) c.r = MulHigh(As<UShort4>(c.r), utri); else c.r = MulHigh(c.r, stri); - if(hasUnsignedTextureComponent(1)) c.g = MulHigh(As<UShort4>(c.g), utri); else c.g = MulHigh(c.g, stri); - if(hasUnsignedTextureComponent(2)) c.b = MulHigh(As<UShort4>(c.b), utri); else c.b = MulHigh(c.b, stri); - if(hasUnsignedTextureComponent(3)) c.a = MulHigh(As<UShort4>(c.a), utri); else c.a = MulHigh(c.a, stri); + if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri); + if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri); + if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri); + if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri); - c.r += cc.r; - c.g += cc.g; - c.b += cc.b; - c.a += cc.a; + c.x += cc.x; + c.y += cc.y; + c.z += cc.z; + c.w += cc.w; - if(!hasUnsignedTextureComponent(0)) c.r += c.r; - if(!hasUnsignedTextureComponent(1)) c.g += c.g; - if(!hasUnsignedTextureComponent(2)) c.b += c.b; - if(!hasUnsignedTextureComponent(3)) c.a += c.a; + if(!hasUnsignedTextureComponent(0)) c.x += c.x; + if(!hasUnsignedTextureComponent(1)) c.y += c.y; + if(!hasUnsignedTextureComponent(2)) c.z += c.z; + if(!hasUnsignedTextureComponent(3)) c.w += c.w; } Short4 borderMask; @@ -468,14 +468,14 @@ { Short4 b; - c.r = borderMask & c.r | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1)); - c.g = borderMask & c.g | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1)); - c.b = borderMask & c.b | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1)); - c.a = borderMask & c.a | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1)); + c.x = borderMask & c.x | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1)); + c.y = borderMask & c.y | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1)); + c.z = borderMask & c.z | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1)); + c.w = borderMask & c.w | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1)); } } - void SamplerCore::sampleAniso(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, bool lodProvided) + void SamplerCore::sampleAniso(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, bool lodProvided) { if(state.textureFilter != FILTER_ANISOTROPIC || lodProvided) { @@ -485,12 +485,12 @@ { Int a = RoundInt(anisotropy); - Color4i cSum; + Vector4i cSum; - cSum.r = Short4(0, 0, 0, 0); - cSum.g = Short4(0, 0, 0, 0); - cSum.b = Short4(0, 0, 0, 0); - cSum.a = Short4(0, 0, 0, 0); + cSum.x = Short4(0, 0, 0, 0); + cSum.y = Short4(0, 0, 0, 0); + cSum.z = Short4(0, 0, 0, 0); + cSum.w = Short4(0, 0, 0, 0); Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); @@ -515,23 +515,23 @@ u0 += du; v0 += dv; - if(hasUnsignedTextureComponent(0)) cSum.r += As<Short4>(MulHigh(As<UShort4>(c.r), cw)); else cSum.r += MulHigh(c.r, sw); - if(hasUnsignedTextureComponent(1)) cSum.g += As<Short4>(MulHigh(As<UShort4>(c.g), cw)); else cSum.g += MulHigh(c.g, sw); - if(hasUnsignedTextureComponent(2)) cSum.b += As<Short4>(MulHigh(As<UShort4>(c.b), cw)); else cSum.b += MulHigh(c.b, sw); - if(hasUnsignedTextureComponent(3)) cSum.a += As<Short4>(MulHigh(As<UShort4>(c.a), cw)); else cSum.a += MulHigh(c.a, sw); + if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw); + if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw); + if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw); + if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw); i++; } Until(i >= a) - if(hasUnsignedTextureComponent(0)) c.r = cSum.r; else c.r = AddSat(cSum.r, cSum.r); - if(hasUnsignedTextureComponent(1)) c.g = cSum.g; else c.g = AddSat(cSum.g, cSum.g); - if(hasUnsignedTextureComponent(2)) c.b = cSum.b; else c.b = AddSat(cSum.b, cSum.b); - if(hasUnsignedTextureComponent(3)) c.a = cSum.a; else c.a = AddSat(cSum.a, cSum.a); + if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x); + if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y); + if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z); + if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w); } } - void SamplerCore::sampleQuad(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD) + void SamplerCore::sampleQuad(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD) { if(state.textureType != TEXTURE_3D) { @@ -543,7 +543,7 @@ } } - void SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float &lod, Int face[4], bool secondLOD) + void SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float &lod, Int face[4], bool secondLOD) { int componentCount = textureComponentCount(); bool gather = state.textureFilter == FILTER_GATHER; @@ -565,10 +565,10 @@ } else { - Color4i c0; - Color4i c1; - Color4i c2; - Color4i c3; + Vector4i c0; + Vector4i c1; + Vector4i c2; + Vector4i c3; Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), (AddressingMode)state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1); Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), (AddressingMode)state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1); @@ -624,29 +624,29 @@ { if(has16bitTexture() && hasUnsignedTextureComponent(0)) { - c0.r = As<UShort4>(c0.r) - MulHigh(As<UShort4>(c0.r), f0u) + MulHigh(As<UShort4>(c1.r), f0u); - c2.r = As<UShort4>(c2.r) - MulHigh(As<UShort4>(c2.r), f0u) + MulHigh(As<UShort4>(c3.r), f0u); - c.r = As<UShort4>(c0.r) - MulHigh(As<UShort4>(c0.r), f0v) + MulHigh(As<UShort4>(c2.r), f0v); + c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u); + c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u); + c.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v); } else { if(hasUnsignedTextureComponent(0)) { - c0.r = MulHigh(As<UShort4>(c0.r), As<UShort4>(f1u1v)); - c1.r = MulHigh(As<UShort4>(c1.r), As<UShort4>(f0u1v)); - c2.r = MulHigh(As<UShort4>(c2.r), As<UShort4>(f1u0v)); - c3.r = MulHigh(As<UShort4>(c3.r), As<UShort4>(f0u0v)); + c0.x = MulHigh(As<UShort4>(c0.x), As<UShort4>(f1u1v)); + c1.x = MulHigh(As<UShort4>(c1.x), As<UShort4>(f0u1v)); + c2.x = MulHigh(As<UShort4>(c2.x), As<UShort4>(f1u0v)); + c3.x = MulHigh(As<UShort4>(c3.x), As<UShort4>(f0u0v)); } else { - c0.r = MulHigh(c0.r, f1u1vs); - c1.r = MulHigh(c1.r, f0u1vs); - c2.r = MulHigh(c2.r, f1u0vs); - c3.r = MulHigh(c3.r, f0u0vs); + c0.x = MulHigh(c0.x, f1u1vs); + c1.x = MulHigh(c1.x, f0u1vs); + c2.x = MulHigh(c2.x, f1u0vs); + c3.x = MulHigh(c3.x, f0u0vs); } - c.r = (c0.r + c1.r) + (c2.r + c3.r); - if(!hasUnsignedTextureComponent(0)) c.r = AddSat(c.r, c.r); // Correct for signed fractions + c.x = (c0.x + c1.x) + (c2.x + c3.x); + if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions } } @@ -654,29 +654,29 @@ { if(has16bitTexture() && hasUnsignedTextureComponent(1)) { - c0.g = As<UShort4>(c0.g) - MulHigh(As<UShort4>(c0.g), f0u) + MulHigh(As<UShort4>(c1.g), f0u); - c2.g = As<UShort4>(c2.g) - MulHigh(As<UShort4>(c2.g), f0u) + MulHigh(As<UShort4>(c3.g), f0u); - c.g = As<UShort4>(c0.g) - MulHigh(As<UShort4>(c0.g), f0v) + MulHigh(As<UShort4>(c2.g), f0v); + c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u); + c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u); + c.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v); } else { if(hasUnsignedTextureComponent(1)) { - c0.g = MulHigh(As<UShort4>(c0.g), As<UShort4>(f1u1v)); - c1.g = MulHigh(As<UShort4>(c1.g), As<UShort4>(f0u1v)); - c2.g = MulHigh(As<UShort4>(c2.g), As<UShort4>(f1u0v)); - c3.g = MulHigh(As<UShort4>(c3.g), As<UShort4>(f0u0v)); + c0.y = MulHigh(As<UShort4>(c0.y), As<UShort4>(f1u1v)); + c1.y = MulHigh(As<UShort4>(c1.y), As<UShort4>(f0u1v)); + c2.y = MulHigh(As<UShort4>(c2.y), As<UShort4>(f1u0v)); + c3.y = MulHigh(As<UShort4>(c3.y), As<UShort4>(f0u0v)); } else { - c0.g = MulHigh(c0.g, f1u1vs); - c1.g = MulHigh(c1.g, f0u1vs); - c2.g = MulHigh(c2.g, f1u0vs); - c3.g = MulHigh(c3.g, f0u0vs); + c0.y = MulHigh(c0.y, f1u1vs); + c1.y = MulHigh(c1.y, f0u1vs); + c2.y = MulHigh(c2.y, f1u0vs); + c3.y = MulHigh(c3.y, f0u0vs); } - c.g = (c0.g + c1.g) + (c2.g + c3.g); - if(!hasUnsignedTextureComponent(1)) c.g = AddSat(c.g, c.g); // Correct for signed fractions + c.y = (c0.y + c1.y) + (c2.y + c3.y); + if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions } } @@ -684,29 +684,29 @@ { if(has16bitTexture() && hasUnsignedTextureComponent(2)) { - c0.b = As<UShort4>(c0.b) - MulHigh(As<UShort4>(c0.b), f0u) + MulHigh(As<UShort4>(c1.b), f0u); - c2.b = As<UShort4>(c2.b) - MulHigh(As<UShort4>(c2.b), f0u) + MulHigh(As<UShort4>(c3.b), f0u); - c.b = As<UShort4>(c0.b) - MulHigh(As<UShort4>(c0.b), f0v) + MulHigh(As<UShort4>(c2.b), f0v); + c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u); + c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u); + c.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v); } else { if(hasUnsignedTextureComponent(2)) { - c0.b = MulHigh(As<UShort4>(c0.b), As<UShort4>(f1u1v)); - c1.b = MulHigh(As<UShort4>(c1.b), As<UShort4>(f0u1v)); - c2.b = MulHigh(As<UShort4>(c2.b), As<UShort4>(f1u0v)); - c3.b = MulHigh(As<UShort4>(c3.b), As<UShort4>(f0u0v)); + c0.z = MulHigh(As<UShort4>(c0.z), As<UShort4>(f1u1v)); + c1.z = MulHigh(As<UShort4>(c1.z), As<UShort4>(f0u1v)); + c2.z = MulHigh(As<UShort4>(c2.z), As<UShort4>(f1u0v)); + c3.z = MulHigh(As<UShort4>(c3.z), As<UShort4>(f0u0v)); } else { - c0.b = MulHigh(c0.b, f1u1vs); - c1.b = MulHigh(c1.b, f0u1vs); - c2.b = MulHigh(c2.b, f1u0vs); - c3.b = MulHigh(c3.b, f0u0vs); + c0.z = MulHigh(c0.z, f1u1vs); + c1.z = MulHigh(c1.z, f0u1vs); + c2.z = MulHigh(c2.z, f1u0vs); + c3.z = MulHigh(c3.z, f0u0vs); } - c.b = (c0.b + c1.b) + (c2.b + c3.b); - if(!hasUnsignedTextureComponent(2)) c.b = AddSat(c.b, c.b); // Correct for signed fractions + c.z = (c0.z + c1.z) + (c2.z + c3.z); + if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions } } @@ -714,43 +714,43 @@ { if(has16bitTexture() && hasUnsignedTextureComponent(3)) { - c0.a = As<UShort4>(c0.a) - MulHigh(As<UShort4>(c0.a), f0u) + MulHigh(As<UShort4>(c1.a), f0u); - c2.a = As<UShort4>(c2.a) - MulHigh(As<UShort4>(c2.a), f0u) + MulHigh(As<UShort4>(c3.a), f0u); - c.a = As<UShort4>(c0.a) - MulHigh(As<UShort4>(c0.a), f0v) + MulHigh(As<UShort4>(c2.a), f0v); + c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u); + c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u); + c.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v); } else { if(hasUnsignedTextureComponent(3)) { - c0.a = MulHigh(As<UShort4>(c0.a), As<UShort4>(f1u1v)); - c1.a = MulHigh(As<UShort4>(c1.a), As<UShort4>(f0u1v)); - c2.a = MulHigh(As<UShort4>(c2.a), As<UShort4>(f1u0v)); - c3.a = MulHigh(As<UShort4>(c3.a), As<UShort4>(f0u0v)); + c0.w = MulHigh(As<UShort4>(c0.w), As<UShort4>(f1u1v)); + c1.w = MulHigh(As<UShort4>(c1.w), As<UShort4>(f0u1v)); + c2.w = MulHigh(As<UShort4>(c2.w), As<UShort4>(f1u0v)); + c3.w = MulHigh(As<UShort4>(c3.w), As<UShort4>(f0u0v)); } else { - c0.a = MulHigh(c0.a, f1u1vs); - c1.a = MulHigh(c1.a, f0u1vs); - c2.a = MulHigh(c2.a, f1u0vs); - c3.a = MulHigh(c3.a, f0u0vs); + c0.w = MulHigh(c0.w, f1u1vs); + c1.w = MulHigh(c1.w, f0u1vs); + c2.w = MulHigh(c2.w, f1u0vs); + c3.w = MulHigh(c3.w, f0u0vs); } - c.a = (c0.a + c1.a) + (c2.a + c3.a); - if(!hasUnsignedTextureComponent(3)) c.a = AddSat(c.a, c.a); // Correct for signed fractions + c.w = (c0.w + c1.w) + (c2.w + c3.w); + if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions } } } else { - c.r = c1.r; - c.g = c2.r; - c.b = c3.r; - c.a = c0.r; + c.x = c1.x; + c.y = c2.x; + c.z = c3.x; + c.w = c0.x; } } } - void SamplerCore::sample3D(Pointer<Byte> &texture, Color4i &c_, Float4 &u_, Float4 &v_, Float4 &w_, Float &lod, bool secondLOD) + void SamplerCore::sample3D(Pointer<Byte> &texture, Vector4i &c_, Float4 &u_, Float4 &v_, Float4 &w_, Float &lod, bool secondLOD) { int componentCount = textureComponentCount(); @@ -774,7 +774,7 @@ } else { - Color4i c[2][2][2]; + Vector4i c[2][2][2]; Short4 u[2][2][2]; Short4 v[2][2][2]; @@ -863,36 +863,36 @@ { sampleTexel(c[i][j][k], u[i][j][k], v[i][j][k], s[i][j][k], mipmap, buffer); - if(componentCount >= 1) if(hasUnsignedTextureComponent(0)) c[i][j][k].r = MulHigh(As<UShort4>(c[i][j][k].r), As<UShort4>(f[1 - i][1 - j][1 - k])); else c[i][j][k].r = MulHigh(c[i][j][k].r, fs[1 - i][1 - j][1 - k]); - if(componentCount >= 2) if(hasUnsignedTextureComponent(1)) c[i][j][k].g = MulHigh(As<UShort4>(c[i][j][k].g), As<UShort4>(f[1 - i][1 - j][1 - k])); else c[i][j][k].g = MulHigh(c[i][j][k].g, fs[1 - i][1 - j][1 - k]); - if(componentCount >= 3) if(hasUnsignedTextureComponent(2)) c[i][j][k].b = MulHigh(As<UShort4>(c[i][j][k].b), As<UShort4>(f[1 - i][1 - j][1 - k])); else c[i][j][k].b = MulHigh(c[i][j][k].b, fs[1 - i][1 - j][1 - k]); - if(componentCount >= 4) if(hasUnsignedTextureComponent(3)) c[i][j][k].a = MulHigh(As<UShort4>(c[i][j][k].a), As<UShort4>(f[1 - i][1 - j][1 - k])); else c[i][j][k].a = MulHigh(c[i][j][k].a, fs[1 - i][1 - j][1 - k]); + if(componentCount >= 1) if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), As<UShort4>(f[1 - i][1 - j][1 - k])); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); + if(componentCount >= 2) if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), As<UShort4>(f[1 - i][1 - j][1 - k])); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); + if(componentCount >= 3) if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), As<UShort4>(f[1 - i][1 - j][1 - k])); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); + if(componentCount >= 4) if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), As<UShort4>(f[1 - i][1 - j][1 - k])); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); if(i != 0 || j != 0 || k != 0) { - if(componentCount >= 1) c[0][0][0].r += c[i][j][k].r; - if(componentCount >= 2) c[0][0][0].g += c[i][j][k].g; - if(componentCount >= 3) c[0][0][0].b += c[i][j][k].b; - if(componentCount >= 4) c[0][0][0].a += c[i][j][k].a; + if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x; + if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y; + if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z; + if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w; } } } } - if(componentCount >= 1) c_.r = c[0][0][0].r; - if(componentCount >= 2) c_.g = c[0][0][0].g; - if(componentCount >= 3) c_.b = c[0][0][0].b; - if(componentCount >= 4) c_.a = c[0][0][0].a; + if(componentCount >= 1) c_.x = c[0][0][0].x; + if(componentCount >= 2) c_.y = c[0][0][0].y; + if(componentCount >= 3) c_.z = c[0][0][0].z; + if(componentCount >= 4) c_.w = c[0][0][0].w; // Correct for signed fractions - if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.r = AddSat(c_.r, c_.r); - if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.g = AddSat(c_.g, c_.g); - if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.b = AddSat(c_.b, c_.b); - if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.a = AddSat(c_.a, c_.a); + if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x); + if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y); + if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z); + if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w); } } - void SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool lodProvided) + void SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool lodProvided) { bool volumeTexture = state.textureType == TEXTURE_3D; @@ -900,16 +900,16 @@ if(state.mipmapFilter > MIPMAP_POINT) { - Color4f cc; + Vector4f cc; sampleFloatAniso(texture, cc, u, v, w, lod, anisotropy, uDelta, vDelta, face, true, lodProvided); - Float4 lod4 = Float4(Fraction(lod)); + Float4 lod4 = Float4(Frac(lod)); - c.r = (cc.r - c.r) * lod4 + c.r; - c.g = (cc.g - c.g) * lod4 + c.g; - c.b = (cc.b - c.b) * lod4 + c.b; - c.a = (cc.a - c.a) * lod4 + c.a; + c.x = (cc.x - c.x) * lod4 + c.x; + c.y = (cc.y - c.y) * lod4 + c.y; + c.z = (cc.z - c.z) * lod4 + c.z; + c.w = (cc.w - c.w) * lod4 + c.w; } Int4 borderMask; @@ -962,14 +962,14 @@ { Int4 b; - c.r = As<Float4>(borderMask & As<Int4>(c.r) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0]))); - c.g = As<Float4>(borderMask & As<Int4>(c.g) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1]))); - c.b = As<Float4>(borderMask & As<Int4>(c.b) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2]))); - c.a = As<Float4>(borderMask & As<Int4>(c.a) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3]))); + c.x = As<Float4>(borderMask & As<Int4>(c.x) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0]))); + c.y = As<Float4>(borderMask & As<Int4>(c.y) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1]))); + c.z = As<Float4>(borderMask & As<Int4>(c.z) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2]))); + c.w = As<Float4>(borderMask & As<Int4>(c.w) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3]))); } } - void SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, bool lodProvided) + void SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, bool lodProvided) { if(state.textureFilter != FILTER_ANISOTROPIC || lodProvided) { @@ -979,12 +979,12 @@ { Int a = RoundInt(anisotropy); - Color4f cSum; + Vector4f cSum; - cSum.r = Float4(0, 0, 0, 0); - cSum.g = Float4(0, 0, 0, 0); - cSum.b = Float4(0, 0, 0, 0); - cSum.a = Float4(0, 0, 0, 0); + cSum.x = Float4(0.0f); + cSum.y = Float4(0.0f); + cSum.z = Float4(0.0f); + cSum.w = Float4(0.0f); Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); @@ -1007,23 +1007,23 @@ u0 += du; v0 += dv; - cSum.r += c.r * A; - cSum.g += c.g * A; - cSum.b += c.b * A; - cSum.a += c.a * A; + cSum.x += c.x * A; + cSum.y += c.y * A; + cSum.z += c.z * A; + cSum.w += c.w * A; i++; } Until(i >= a) - c.r = cSum.r; - c.g = cSum.g; - c.b = cSum.b; - c.a = cSum.a; + c.x = cSum.x; + c.y = cSum.y; + c.z = cSum.z; + c.w = cSum.w; } } - void SamplerCore::sampleFloat(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD) + void SamplerCore::sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD) { if(state.textureType != TEXTURE_3D) { @@ -1035,7 +1035,7 @@ } } - void SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &z, Float &lod, Int face[4], bool secondLOD) + void SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &z, Float &lod, Int face[4], bool secondLOD) { int componentCount = textureComponentCount(); bool gather = state.textureFilter == FILTER_GATHER; @@ -1057,10 +1057,10 @@ } else { - Color4f c0; - Color4f c1; - Color4f c2; - Color4f c3; + Vector4f c0; + Vector4f c1; + Vector4f c2; + Vector4f c3; Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), (AddressingMode)state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1); Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), (AddressingMode)state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1); @@ -1075,35 +1075,35 @@ if(!gather) // Blend { // Fractions - Float4 fu = Fraction(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth))); - Float4 fv = Fraction(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight))); + Float4 fu = Frac(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth))); + Float4 fv = Frac(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight))); - if(componentCount >= 1) c0.r = c0.r + fu * (c1.r - c0.r); - if(componentCount >= 2) c0.g = c0.g + fu * (c1.g - c0.g); - if(componentCount >= 3) c0.b = c0.b + fu * (c1.b - c0.b); - if(componentCount >= 4) c0.a = c0.a + fu * (c1.a - c0.a); + if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x); + if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y); + if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z); + if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w); - if(componentCount >= 1) c2.r = c2.r + fu * (c3.r - c2.r); - if(componentCount >= 2) c2.g = c2.g + fu * (c3.g - c2.g); - if(componentCount >= 3) c2.b = c2.b + fu * (c3.b - c2.b); - if(componentCount >= 4) c2.a = c2.a + fu * (c3.a - c2.a); + if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x); + if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y); + if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z); + if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w); - if(componentCount >= 1) c.r = c0.r + fv * (c2.r - c0.r); - if(componentCount >= 2) c.g = c0.g + fv * (c2.g - c0.g); - if(componentCount >= 3) c.b = c0.b + fv * (c2.b - c0.b); - if(componentCount >= 4) c.a = c0.a + fv * (c2.a - c0.a); + if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x); + if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y); + if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z); + if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w); } else { - c.r = c1.r; - c.g = c2.r; - c.b = c3.r; - c.a = c0.r; + c.x = c1.x; + c.y = c2.x; + c.z = c3.x; + c.w = c0.x; } } } - void SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, bool secondLOD) + void SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, bool secondLOD) { int componentCount = textureComponentCount(); @@ -1127,14 +1127,14 @@ } else { - Color4f &c0 = c; - Color4f c1; - Color4f c2; - Color4f c3; - Color4f c4; - Color4f c5; - Color4f c6; - Color4f c7; + Vector4f &c0 = c; + Vector4f c1; + Vector4f c2; + Vector4f c3; + Vector4f c4; + Vector4f c5; + Vector4f c6; + Vector4f c7; Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), (AddressingMode)state.addressingModeU == ADDRESSING_WRAP, -1); Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), (AddressingMode)state.addressingModeV == ADDRESSING_WRAP, -1); @@ -1153,51 +1153,51 @@ sampleTexel(c7, uuuu1, vvvv1, wwww1, w, mipmap, buffer); // Fractions - Float4 fu = Fraction(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth))); - Float4 fv = Fraction(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight))); - Float4 fw = Fraction(Float4(As<UShort4>(wwww0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fDepth))); + Float4 fu = Frac(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth))); + Float4 fv = Frac(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight))); + Float4 fw = Frac(Float4(As<UShort4>(wwww0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fDepth))); // Blend first slice - if(componentCount >= 1) c0.r = c0.r + fu * (c1.r - c0.r); - if(componentCount >= 2) c0.g = c0.g + fu * (c1.g - c0.g); - if(componentCount >= 3) c0.b = c0.b + fu * (c1.b - c0.b); - if(componentCount >= 4) c0.a = c0.a + fu * (c1.a - c0.a); + if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x); + if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y); + if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z); + if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w); - if(componentCount >= 1) c2.r = c2.r + fu * (c3.r - c2.r); - if(componentCount >= 2) c2.g = c2.g + fu * (c3.g - c2.g); - if(componentCount >= 3) c2.b = c2.b + fu * (c3.b - c2.b); - if(componentCount >= 4) c2.a = c2.a + fu * (c3.a - c2.a); + if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x); + if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y); + if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z); + if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w); - if(componentCount >= 1) c0.r = c0.r + fv * (c2.r - c0.r); - if(componentCount >= 2) c0.g = c0.g + fv * (c2.g - c0.g); - if(componentCount >= 3) c0.b = c0.b + fv * (c2.b - c0.b); - if(componentCount >= 4) c0.a = c0.a + fv * (c2.a - c0.a); + if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x); + if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y); + if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z); + if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w); // Blend second slice - if(componentCount >= 1) c4.r = c4.r + fu * (c5.r - c4.r); - if(componentCount >= 2) c4.g = c4.g + fu * (c5.g - c4.g); - if(componentCount >= 3) c4.b = c4.b + fu * (c5.b - c4.b); - if(componentCount >= 4) c4.a = c4.a + fu * (c5.a - c4.a); + if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x); + if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y); + if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z); + if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w); - if(componentCount >= 1) c6.r = c6.r + fu * (c7.r - c6.r); - if(componentCount >= 2) c6.g = c6.g + fu * (c7.g - c6.g); - if(componentCount >= 3) c6.b = c6.b + fu * (c7.b - c6.b); - if(componentCount >= 4) c6.a = c6.a + fu * (c7.a - c6.a); + if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x); + if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y); + if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z); + if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w); - if(componentCount >= 1) c4.r = c4.r + fv * (c6.r - c4.r); - if(componentCount >= 2) c4.g = c4.g + fv * (c6.g - c4.g); - if(componentCount >= 3) c4.b = c4.b + fv * (c6.b - c4.b); - if(componentCount >= 4) c4.a = c4.a + fv * (c6.a - c4.a); + if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x); + if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y); + if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z); + if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w); // Blend slices - if(componentCount >= 1) c0.r = c0.r + fw * (c4.r - c0.r); - if(componentCount >= 2) c0.g = c0.g + fw * (c4.g - c0.g); - if(componentCount >= 3) c0.b = c0.b + fw * (c4.b - c0.b); - if(componentCount >= 4) c0.a = c0.a + fw * (c4.a - c0.a); + if(componentCount >= 1) c0.x = c0.x + fw * (c4.x - c0.x); + if(componentCount >= 2) c0.y = c0.y + fw * (c4.y - c0.y); + if(componentCount >= 3) c0.z = c0.z + fw * (c4.z - c0.z); + if(componentCount >= 4) c0.w = c0.w + fw * (c4.w - c0.w); } } - void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Float &lodBias, Color4f &dsx, Color4f &dsy, bool bias, bool gradients, bool lodProvided) + void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Float &lodBias, Vector4f &dsx, Vector4f &dsy, bool bias, bool gradients, bool lodProvided) { if(!lodProvided) { @@ -1267,7 +1267,7 @@ lod = Min(lod, Float(MIPMAP_LEVELS - 2)); // Trilinear accesses lod+1 } - void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Float &lodBias, Color4f &dsx, Color4f &dsy, bool bias, bool gradients, bool lodProvided) + void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Float &lodBias, Vector4f &dsx, Vector4f &dsy, bool bias, bool gradients, bool lodProvided) { if(state.mipmapFilter == MIPMAP_NONE) { @@ -1337,9 +1337,9 @@ void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodU, Float4 &lodV, Float4 &x, Float4 &y, Float4 &z) { - Int4 xp = CmpNLE(x, Float4(0.0f, 0.0f, 0.0f, 0.0f)); // x > 0 - Int4 yp = CmpNLE(y, Float4(0.0f, 0.0f, 0.0f, 0.0f)); // y > 0 - Int4 zp = CmpNLE(z, Float4(0.0f, 0.0f, 0.0f, 0.0f)); // z > 0 + Int4 xp = CmpNLE(x, Float4(0.0f)); // x > 0 + Int4 yp = CmpNLE(y, Float4(0.0f)); // y > 0 + Int4 zp = CmpNLE(z, Float4(0.0f)); // z > 0 Float4 absX = Abs(x); Float4 absY = Abs(y); @@ -1383,8 +1383,8 @@ Float4 M = As<Float4>((xyz & As<Int4>(x)) | (yzx & As<Int4>(y)) | (zxy & As<Int4>(z))); M = reciprocal(M); - U *= M * Float4(0.5f, 0.5f, 0.5f, 0.5f); - V *= M * Float4(0.5f, 0.5f, 0.5f, 0.5f); + U *= M * Float4(0.5f); + V *= M * Float4(0.5f); // Project coordinates onto one face for consistent LOD calculation { @@ -1404,8 +1404,8 @@ Float4 M = As<Float4>((xyz & As<Int4>(x)) | (yzx & As<Int4>(y)) | (zxy & As<Int4>(z))); M = Rcp_pp(M); - lodU *= M * Float4(0.5f, 0.5f, 0.5f, 0.5f); - lodV *= M * Float4(0.5f, 0.5f, 0.5f, 0.5f); + lodU *= M * Float4(0.5f); + lodV *= M * Float4(0.5f); } } @@ -1467,7 +1467,7 @@ index[3] = Extract(As<Int2>(uuu2), 1); } - void SamplerCore::sampleTexel(Color4i &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]) + void SamplerCore::sampleTexel(Vector4i &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]) { Int index[4]; @@ -1493,32 +1493,32 @@ Byte8 c1 = *Pointer<Byte8>(buffer[f1] + 4 * index[1]); Byte8 c2 = *Pointer<Byte8>(buffer[f2] + 4 * index[2]); Byte8 c3 = *Pointer<Byte8>(buffer[f3] + 4 * index[3]); - c.r = UnpackLow(c0, c1); - c.g = UnpackLow(c2, c3); + c.x = UnpackLow(c0, c1); + c.y = UnpackLow(c2, c3); switch(state.textureFormat) { case FORMAT_A8R8G8B8: - c.b = c.r; - c.b = As<Short4>(UnpackLow(c.b, c.g)); - c.r = As<Short4>(UnpackHigh(c.r, c.g)); - c.g = c.b; - c.a = c.r; - c.b = UnpackLow(As<Byte8>(c.b), As<Byte8>(c.b)); - c.g = UnpackHigh(As<Byte8>(c.g), As<Byte8>(c.g)); - c.r = UnpackLow(As<Byte8>(c.r), As<Byte8>(c.r)); - c.a = UnpackHigh(As<Byte8>(c.a), As<Byte8>(c.a)); + c.z = c.x; + c.z = As<Short4>(UnpackLow(c.z, c.y)); + c.x = As<Short4>(UnpackHigh(c.x, c.y)); + c.y = c.z; + c.w = c.x; + c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); + c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); + c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); + c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w)); break; case FORMAT_Q8W8V8U8: - c.b = c.r; - c.r = As<Short4>(UnpackLow(c.r, c.g)); - c.b = As<Short4>(UnpackHigh(c.b, c.g)); - c.g = c.r; - c.a = c.b; - c.r = UnpackLow(As<Byte8>(c.r), As<Byte8>(c.r)); - c.g = UnpackHigh(As<Byte8>(c.g), As<Byte8>(c.g)); - c.b = UnpackLow(As<Byte8>(c.b), As<Byte8>(c.b)); - c.a = UnpackHigh(As<Byte8>(c.a), As<Byte8>(c.a)); + c.z = c.x; + c.x = As<Short4>(UnpackLow(c.x, c.y)); + c.z = As<Short4>(UnpackHigh(c.z, c.y)); + c.y = c.x; + c.w = c.z; + c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); + c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); + c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); + c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w)); break; default: ASSERT(false); @@ -1531,30 +1531,30 @@ Byte8 c1 = *Pointer<Byte8>(buffer[f1] + 4 * index[1]); Byte8 c2 = *Pointer<Byte8>(buffer[f2] + 4 * index[2]); Byte8 c3 = *Pointer<Byte8>(buffer[f3] + 4 * index[3]); - c.r = UnpackLow(c0, c1); - c.g = UnpackLow(c2, c3); + c.x = UnpackLow(c0, c1); + c.y = UnpackLow(c2, c3); switch(state.textureFormat) { case FORMAT_X8R8G8B8: - c.b = c.r; - c.b = As<Short4>(UnpackLow(c.b, c.g)); - c.r = As<Short4>(UnpackHigh(c.r, c.g)); - c.g = c.b; - c.b = UnpackLow(As<Byte8>(c.b), As<Byte8>(c.b)); - c.g = UnpackHigh(As<Byte8>(c.g), As<Byte8>(c.g)); - c.r = UnpackLow(As<Byte8>(c.r), As<Byte8>(c.r)); + c.z = c.x; + c.z = As<Short4>(UnpackLow(c.z, c.y)); + c.x = As<Short4>(UnpackHigh(c.x, c.y)); + c.y = c.z; + c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); + c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); + c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); break; case FORMAT_X8L8V8U8: - c.b = c.r; - c.r = As<Short4>(UnpackLow(c.r, c.g)); - c.b = As<Short4>(UnpackHigh(c.b, c.g)); - c.g = c.r; - c.r = UnpackLow(As<Byte8>(c.r), As<Byte8>(Short4(0x0000, 0x0000, 0x0000, 0x0000))); - c.r = c.r << 8; - c.g = UnpackHigh(As<Byte8>(c.g), As<Byte8>(Short4(0x0000, 0x0000, 0x0000, 0x0000))); - c.g = c.g << 8; - c.b = UnpackLow(As<Byte8>(c.b), As<Byte8>(c.b)); + c.z = c.x; + c.x = As<Short4>(UnpackLow(c.x, c.y)); + c.z = As<Short4>(UnpackHigh(c.z, c.y)); + c.y = c.x; + c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0x0000, 0x0000, 0x0000, 0x0000))); + c.x = c.x << 8; + c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0x0000, 0x0000, 0x0000, 0x0000))); + c.y = c.y << 8; + c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); break; default: ASSERT(false); @@ -1562,10 +1562,10 @@ } break; case 2: - c.r = Insert(c.r, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0); - c.r = Insert(c.r, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1); - c.r = Insert(c.r, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2); - c.r = Insert(c.r, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3); + c.x = Insert(c.x, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0); + c.x = Insert(c.x, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1); + c.x = Insert(c.x, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2); + c.x = Insert(c.x, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3); switch(state.textureFormat) { @@ -1573,8 +1573,8 @@ case FORMAT_V8U8: case FORMAT_A8L8: // FIXME: Unpack properly to 0.16 format - c.g = c.r; - c.r = c.r << 8; + c.y = c.x; + c.x = c.x << 8; break; default: ASSERT(false); @@ -1586,8 +1586,7 @@ c2 = Int(*Pointer<Byte>(buffer[f2] + index[2])); c3 = Int(*Pointer<Byte>(buffer[f3] + index[3])); c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); - c.r = As<Short4>(Int2(c0)); - c.r = UnpackLow(As<Byte8>(c.r), As<Byte8>(c.r)); + c.x = Unpack(As<Byte4>(c0)); break; default: ASSERT(false); @@ -1598,26 +1597,26 @@ switch(textureComponentCount()) { case 4: - c.r = *Pointer<Short4>(buffer[f0] + 8 * index[0]); - c.g = *Pointer<Short4>(buffer[f1] + 8 * index[1]); - c.b = *Pointer<Short4>(buffer[f2] + 8 * index[2]); - c.a = *Pointer<Short4>(buffer[f3] + 8 * index[3]); - transpose4x4(c.r, c.g, c.b, c.a); + c.x = *Pointer<Short4>(buffer[f0] + 8 * index[0]); + c.y = *Pointer<Short4>(buffer[f1] + 8 * index[1]); + c.z = *Pointer<Short4>(buffer[f2] + 8 * index[2]); + c.w = *Pointer<Short4>(buffer[f3] + 8 * index[3]); + transpose4x4(c.x, c.y, c.z, c.w); break; case 2: - c.r = *Pointer<Short4>(buffer[f0] + 4 * index[0]); - c.r = As<Short4>(UnpackLow(c.r, *Pointer<Short4>(buffer[f1] + 4 * index[1]))); - c.b = *Pointer<Short4>(buffer[f2] + 4 * index[2]); - c.b = As<Short4>(UnpackLow(c.b, *Pointer<Short4>(buffer[f3] + 4 * index[3]))); - c.g = c.r; - c.r = As<Short4>(UnpackLow(As<Int2>(c.r), As<Int2>(c.b))); - c.g = As<Short4>(UnpackHigh(As<Int2>(c.g), As<Int2>(c.b))); + c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]); + c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1]))); + c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]); + c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3]))); + c.y = c.x; + c.x = As<Short4>(UnpackLow(As<Int2>(c.x), As<Int2>(c.z))); + c.y = As<Short4>(UnpackHigh(As<Int2>(c.y), As<Int2>(c.z))); break; case 1: - c.r = Insert(c.r, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0); - c.r = Insert(c.r, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1); - c.r = Insert(c.r, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2); - c.r = Insert(c.r, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3); + c.x = Insert(c.x, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0); + c.x = Insert(c.x, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1); + c.x = Insert(c.x, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2); + c.x = Insert(c.x, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3); break; default: ASSERT(false); @@ -1625,7 +1624,7 @@ } } - void SamplerCore::sampleTexel(Color4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]) + void SamplerCore::sampleTexel(Vector4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]) { Int index[4]; @@ -1640,34 +1639,34 @@ switch(textureComponentCount()) { case 4: - c.r = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); - c.g = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); - c.b = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); - c.a = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); - transpose4x4(c.r, c.g, c.b, c.a); + c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); + c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); + c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); + c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); + transpose4x4(c.x, c.y, c.z, c.w); break; case 2: // FIXME: Optimal shuffling? - c.r.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8); - c.r.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8); - c.b.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8); - c.b.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8); - c.g = c.r; - c.r = Float4(c.r.xz, c.b.xz); - c.g = Float4(c.g.yw, c.b.yw); + c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8); + c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8); + c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8); + c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8); + c.y = c.x; + c.x = Float4(c.x.xz, c.z.xz); + c.y = Float4(c.y.yw, c.z.yw); break; case 1: // FIXME: Optimal shuffling? - c.r.x = *Pointer<Float>(buffer[f0] + index[0] * 4); - c.r.y = *Pointer<Float>(buffer[f1] + index[1] * 4); - c.r.z = *Pointer<Float>(buffer[f2] + index[2] * 4); - c.r.w = *Pointer<Float>(buffer[f3] + index[3] * 4); + c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4); + c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4); + c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4); + c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4); if(state.textureFormat == FORMAT_D32F_SHADOW && state.textureFilter != FILTER_GATHER) { Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f)); - c.r = As<Float4>(As<Int4>(CmpNLT(c.r, d)) & As<Int4>(Float4(1.0f, 1.0f, 1.0f, 1.0f))); // FIXME: Only less-equal? + c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f))); // FIXME: Only less-equal? } break; default: @@ -1746,15 +1745,15 @@ void SamplerCore::convertFixed12(Short4 &ci, Float4 &cf) { - ci = RoundShort4(cf * Float4(0x1000, 0x1000, 0x1000, 0x1000)); + ci = RoundShort4(cf * Float4(0x1000)); } - void SamplerCore::convertFixed12(Color4i &ci, Color4f &cf) + void SamplerCore::convertFixed12(Vector4i &ci, Vector4f &cf) { - convertFixed12(ci.r, cf.r); - convertFixed12(ci.g, cf.g); - convertFixed12(ci.b, cf.b); - convertFixed12(ci.a, cf.a); + convertFixed12(ci.x, cf.x); + convertFixed12(ci.y, cf.y); + convertFixed12(ci.z, cf.z); + convertFixed12(ci.w, cf.w); } void SamplerCore::convertSigned12(Float4 &cf, Short4 &ci) @@ -1762,22 +1761,22 @@ cf = Float4(ci) * Float4(1.0f / 0x0FFE); } -// void SamplerCore::convertSigned12(Color4f &cf, Color4i &ci) +// void SamplerCore::convertSigned12(Vector4f &cf, Vector4i &ci) // { -// convertSigned12(cf.r, ci.r); -// convertSigned12(cf.g, ci.g); -// convertSigned12(cf.b, ci.b); -// convertSigned12(cf.a, ci.a); +// convertSigned12(cf.x, ci.x); +// convertSigned12(cf.y, ci.y); +// convertSigned12(cf.z, ci.z); +// convertSigned12(cf.w, ci.w); // } void SamplerCore::convertSigned15(Float4 &cf, Short4 &ci) { - cf = Float4(ci) * Float4(1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF); + cf = Float4(ci) * Float4(1.0f / 0x7FFF); } void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &ci) { - cf = Float4(As<UShort4>(ci)) * Float4(1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF); + cf = Float4(As<UShort4>(ci)) * Float4(1.0f / 0xFFFF); } void SamplerCore::sRGBtoLinear16_12(Short4 &c)
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp index c3c11e9..c03e30c 100644 --- a/src/Shader/SamplerCore.hpp +++ b/src/Shader/SamplerCore.hpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -22,34 +22,34 @@ public: SamplerCore(Pointer<Byte> &r, const Sampler::State &state); - void sampleTexture(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool bias = false, bool fixed12 = true, bool gradients = false, bool lodProvided = false); - void sampleTexture(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool bias = false, bool gradients = false, bool lodProvided = false); + void sampleTexture(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool bias = false, bool fixed12 = true, bool gradients = false, bool lodProvided = false); + void sampleTexture(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool bias = false, bool gradients = false, bool lodProvided = false); private: void border(Short4 &mask, Float4 &coordinates); void border(Int4 &mask, Float4 &coordinates); Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count); - void sampleFilter(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool lodProvided); - void sampleAniso(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, bool lodProvided); - void sampleQuad(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD); - void sampleQuad2D(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float &lod, Int face[4], bool secondLOD); - void sample3D(Pointer<Byte> &texture, Color4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, bool secondLOD); - void sampleFloatFilter(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool lodProvided); - void sampleFloatAniso(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, bool lodProvided); - void sampleFloat(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD); - void sampleFloat2D(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &z, Float &lod, Int face[4], bool secondLOD); - void sampleFloat3D(Pointer<Byte> &texture, Color4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, bool secondLOD); - void computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Float &lodBias, Color4f &dsx, Color4f &dsy, bool bias, bool gradients, bool lodProvided); - void computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float &lodBias, Color4f &dsx, Color4f &dsy, bool bias, bool gradients, bool lodProvided); + void sampleFilter(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool lodProvided); + void sampleAniso(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, bool lodProvided); + void sampleQuad(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD); + void sampleQuad2D(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float &lod, Int face[4], bool secondLOD); + void sample3D(Pointer<Byte> &texture, Vector4i &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, bool secondLOD); + void sampleFloatFilter(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool lodProvided); + void sampleFloatAniso(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, bool lodProvided); + void sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD); + void sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &z, Float &lod, Int face[4], bool secondLOD); + void sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, bool secondLOD); + void computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Float &lodBias, Vector4f &dsx, Vector4f &dsy, bool bias, bool gradients, bool lodProvided); + void computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float &lodBias, Vector4f &dsx, Vector4f &dsy, bool bias, bool gradients, bool lodProvided); void cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodU, Float4 &lodV, Float4 &x, Float4 &y, Float4 &z); void computeIndices(Int index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, const Pointer<Byte> &mipmap); - void sampleTexel(Color4i &c, Short4 &u, Short4 &v, Short4 &s, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]); - void sampleTexel(Color4f &c, Short4 &u, Short4 &v, Short4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]); + void sampleTexel(Vector4i &c, Short4 &u, Short4 &v, Short4 &s, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]); + void sampleTexel(Vector4f &c, Short4 &u, Short4 &v, Short4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]); void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD); void address(Short4 &uuuu, Float4 &uw, AddressingMode addressingMode); void convertFixed12(Short4 &ci, Float4 &cf); - void convertFixed12(Color4i &ci, Color4f &cf); + void convertFixed12(Vector4i &ci, Vector4f &cf); void convertSigned12(Float4 &cf, Short4 &ci); void convertSigned15(Float4 &cf, Short4 &ci); void convertUnsigned16(Float4 &cf, Short4 &ci);
diff --git a/src/Shader/SetupRoutine.cpp b/src/Shader/SetupRoutine.cpp index 40a2a18..d77a21e 100644 --- a/src/Shader/SetupRoutine.cpp +++ b/src/Shader/SetupRoutine.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -15,7 +15,7 @@ #include "Renderer/Primitive.hpp" #include "Renderer/Polygon.hpp" #include "Renderer/Renderer.hpp" -#include "Reactor/Shell.hpp" +#include "Reactor/Reactor.hpp" namespace sw { @@ -108,10 +108,16 @@ if(state.twoSidedStencil) { - Byte8 clockwiseMask = IfThenElse(A > Float(0.0f), Byte8(0xFFFFFFFFFFFFFFFF), Byte8(0x0000000000000000)); - - *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = clockwiseMask; - *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = ~clockwiseMask; + If(A > Float(0.0f)) + { + *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFFFFFFFFFFFFFFFF); + *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x0000000000000000); + } + Else + { + *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0x0000000000000000); + *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0xFFFFFFFFFFFFFFFF); + } } if(state.vFace) @@ -145,8 +151,8 @@ Float w = v.w; Float rhw = IfThenElse(w != Float(0.0f), Float(1.0f) / w, Float(1.0f)); - X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,LLLLx16)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,WWWWx16))); - Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,TTTTx16)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,HHHHx16))); + X[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float>(data + OFFSET(DrawData,Wx16))); + Y[i] = RoundInt(*Pointer<Float>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float>(data + OFFSET(DrawData,Hx16))); i++; } @@ -161,8 +167,8 @@ Do { - yMin = IfThenElse(Y[i] < yMin, Int(Y[i]), yMin); // FIXME: Min(Y[i], yMin) - yMax = IfThenElse(Y[i] > yMax, Int(Y[i]), yMax); // FIXME: Max(Y[i], yMax) + yMin = Min(Y[i], yMin); + yMax = Max(Y[i], yMax); i++; } @@ -183,6 +189,9 @@ { Return(false); } + + yMin = Max(yMin, *Pointer<Int>(data + OFFSET(DrawData,scissorY0))); + yMax = Min(yMax, *Pointer<Int>(data + OFFSET(DrawData,scissorY1))); For(Int q = 0, q < state.multiSample, q++) { @@ -229,7 +238,7 @@ Do { - edge(primitive, Int(Xq[i + 1 - d]), Int(Yq[i + 1 - d]), Int(Xq[i + d]), Int(Yq[i + d]), q); + edge(primitive, data, Int(Xq[i + 1 - d]), Int(Yq[i + 1 - d]), Int(Xq[i + d]), Int(Yq[i + d]), q); i++; } @@ -372,7 +381,7 @@ // M[2].w = 0; } - if(state.perspective) + if(state.interpolateW) { Float4 ABC = M[0] + M[1] + M[2]; @@ -385,7 +394,7 @@ *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) = C; } - if(state.interpolateDepth) + if(state.interpolateZ) { Float z0 = *Pointer<Float>(v0 + OFFSET(Vertex,Z)); Float z1 = *Pointer<Float>(v1 + OFFSET(Vertex,Z)); @@ -491,7 +500,7 @@ if(component == 3) i.y = Float(1.0f); if(component == 0) i.z = Float(0.5f); - if(component == 1) i.z = Float(0.0f); + if(component == 1) i.z = Float(1.0f); if(component == 2) i.z = Float(0.0f); if(component == 3) i.z = Float(1.0f); @@ -518,17 +527,9 @@ i *= w012; } - Float4 A; - Float4 B; - Float4 C; - - A = i.xxxx; - B = i.yyyy; - C = i.zzzz; - - A *= m[0]; - B *= m[1]; - C *= m[2]; + Float4 A = i.xxxx * m[0]; + Float4 B = i.yyyy * m[1]; + Float4 C = i.zzzz * m[2]; C = A + B + C; @@ -550,10 +551,13 @@ } } - void SetupRoutine::edge(Pointer<Byte> &primitive, Int &X1, Int &Y1, Int &X2, Int &Y2, Int &q) + void SetupRoutine::edge(Pointer<Byte> &primitive, Pointer<Byte> &data, Int &X1, Int &Y1, Int &X2, Int &Y2, Int &q) { If(Y1 != Y2) { + Int xMin = *Pointer<Int>(data + OFFSET(DrawData,scissorX0)); + Int xMax = *Pointer<Int>(data + OFFSET(DrawData,scissorX1)); + Bool swap = Y2 < Y1; Pointer<Byte> leftEdge = primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left); @@ -567,10 +571,10 @@ Y1 = IfThenElse(swap, Y2, Y1); Y2 = IfThenElse(swap, Y0, Y2); - Int y1 = (Y1 + 0x0000000F) >> 4; - Int y2 = (Y2 + 0x0000000F) >> 4; + Int y1 = Max((Y1 + 0x0000000F) >> 4, *Pointer<Int>(data + OFFSET(DrawData,scissorY0))); + Int y2 = Min((Y2 + 0x0000000F) >> 4, *Pointer<Int>(data + OFFSET(DrawData,scissorY1))); - If(y1 != y2) + If(y1 < y2) { // Deltas Int DX12 = X2 - X1; @@ -579,7 +583,7 @@ Int FDX12 = DX12 << 4; Int FDY12 = DY12 << 4; - Int X = DX12 * (-Y1 & 0xF) + X1 * DY12; + Int X = DX12 * ((y1 << 4) - Y1) + X1 * DY12; Int x = X / FDY12; // Edge Int d = X % FDY12; // Error-term Int ceil = -d >> 31; // Ceiling division: remainder <= 0 @@ -597,7 +601,7 @@ Do { - *Pointer<Short>(edge + y * sizeof(Primitive::Span)) = Short(x); + *Pointer<Short>(edge + y * sizeof(Primitive::Span)) = Short(Clamp(x, xMin, xMax)); x += Q; d += R;
diff --git a/src/Shader/SetupRoutine.hpp b/src/Shader/SetupRoutine.hpp index cf40281..dd280c7 100644 --- a/src/Shader/SetupRoutine.hpp +++ b/src/Shader/SetupRoutine.hpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -31,7 +31,7 @@ private: void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool sprite, bool perspective, bool wrap, int component); - void edge(Pointer<Byte> &primitive, Int &X1, Int &Y1, Int &X2, Int &Y2, Int &q); + void edge(Pointer<Byte> &primitive, Pointer<Byte> &data, Int &X1, Int &Y1, Int &X2, Int &Y2, Int &q); void conditionalRotate1(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2); void conditionalRotate2(Bool condition, Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2);
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp index 43e8bdc..fcbc839 100644 --- a/src/Shader/Shader.cpp +++ b/src/Shader/Shader.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -11,42 +11,137 @@ #include "Shader.hpp" +#include "VertexShader.hpp" +#include "PixelShader.hpp" #include "Math.hpp" #include "Debug.hpp" -#include <stdarg.h> #include <fstream> #include <sstream> namespace sw { - Shader::Instruction::Instruction() + volatile int Shader::serialCounter = 1; + + Shader::Opcode Shader::OPCODE_DP(int i) { - operation.opcode = Operation::OPCODE_NOP; - destinationParameter.type = Parameter::PARAMETER_VOID; - sourceParameter[0].type = Parameter::PARAMETER_VOID; - sourceParameter[1].type = Parameter::PARAMETER_VOID; - sourceParameter[2].type = Parameter::PARAMETER_VOID; - sourceParameter[3].type = Parameter::PARAMETER_VOID; + switch(i) + { + default: ASSERT(false); + case 1: return OPCODE_DP1; + case 2: return OPCODE_DP2; + case 3: return OPCODE_DP3; + case 4: return OPCODE_DP4; + } } - Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) + Shader::Opcode Shader::OPCODE_LEN(int i) + { + switch(i) + { + default: ASSERT(false); + case 1: return OPCODE_ABS; + case 2: return OPCODE_LEN2; + case 3: return OPCODE_LEN3; + case 4: return OPCODE_LEN4; + } + } + + Shader::Opcode Shader::OPCODE_DIST(int i) + { + switch(i) + { + default: ASSERT(false); + case 1: return OPCODE_DIST1; + case 2: return OPCODE_DIST2; + case 3: return OPCODE_DIST3; + case 4: return OPCODE_DIST4; + } + } + + Shader::Opcode Shader::OPCODE_NRM(int i) + { + switch(i) + { + default: ASSERT(false); + case 1: return OPCODE_SGN; + case 2: return OPCODE_NRM2; + case 3: return OPCODE_NRM3; + case 4: return OPCODE_NRM4; + } + } + + Shader::Opcode Shader::OPCODE_FORWARD(int i) + { + switch(i) + { + default: ASSERT(false); + case 1: return OPCODE_FORWARD1; + case 2: return OPCODE_FORWARD2; + case 3: return OPCODE_FORWARD3; + case 4: return OPCODE_FORWARD4; + } + } + + Shader::Opcode Shader::OPCODE_REFLECT(int i) + { + switch(i) + { + default: ASSERT(false); + case 1: return OPCODE_REFLECT1; + case 2: return OPCODE_REFLECT2; + case 3: return OPCODE_REFLECT3; + case 4: return OPCODE_REFLECT4; + } + } + + Shader::Opcode Shader::OPCODE_REFRACT(int i) + { + switch(i) + { + default: ASSERT(false); + case 1: return OPCODE_REFRACT1; + case 2: return OPCODE_REFRACT2; + case 3: return OPCODE_REFRACT3; + case 4: return OPCODE_REFRACT4; + } + } + + Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0) + { + control = CONTROL_RESERVED0; + + predicate = false; + predicateNot = false; + predicateSwizzle = 0xE4; + + coissue = false; + samplerType = SAMPLER_UNKNOWN; + usage = USAGE_POSITION; + usageIndex = 0; + } + + Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0) { parseOperationToken(*token++, majorVersion); - if(operation.opcode == Operation::OPCODE_IF || - operation.opcode == Operation::OPCODE_IFC || - operation.opcode == Operation::OPCODE_LOOP || - operation.opcode == Operation::OPCODE_REP || - operation.opcode == Operation::OPCODE_BREAKC || - operation.opcode == Operation::OPCODE_BREAKP) // No destination operand + samplerType = SAMPLER_UNKNOWN; + usage = USAGE_POSITION; + usageIndex = 0; + + if(opcode == OPCODE_IF || + opcode == OPCODE_IFC || + opcode == OPCODE_LOOP || + opcode == OPCODE_REP || + opcode == OPCODE_BREAKC || + opcode == OPCODE_BREAKP) // No destination operand { if(size > 0) parseSourceToken(0, token++, majorVersion); if(size > 1) parseSourceToken(1, token++, majorVersion); if(size > 2) parseSourceToken(2, token++, majorVersion); if(size > 3) ASSERT(false); } - else if(operation.opcode == Operation::OPCODE_DCL) + else if(opcode == OPCODE_DCL) { parseDeclarationToken(*token++); parseDestinationToken(token++, majorVersion); @@ -57,7 +152,7 @@ { parseDestinationToken(token, majorVersion); - if(destinationParameter.relative && majorVersion >= 3) + if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3) { token++; size--; @@ -67,12 +162,12 @@ size--; } - if(operation.predicate) + if(predicate) { ASSERT(size != 0); - operation.predicateNot = (SourceParameter::Modifier)((*token & 0x0F000000) >> 24) == SourceParameter::MODIFIER_NOT; - operation.predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16); + predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT; + predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16); token++; size--; @@ -85,7 +180,7 @@ token++; size--; - if(sourceParameter[i].relative && majorVersion >= 2) + if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2) { token++; size--; @@ -98,105 +193,40 @@ { } - Shader::Instruction::Operation::Opcode Shader::Instruction::getOpcode() const - { - return operation.opcode; - } - - const Shader::Instruction::DestinationParameter &Shader::Instruction::getDestinationParameter() const - { - return destinationParameter; - } - - const Shader::Instruction::SourceParameter &Shader::Instruction::getSourceParameter(int i) const - { - return sourceParameter[i]; - } - - bool Shader::Instruction::isCoissue() const - { - return operation.coissue; - } - - bool Shader::Instruction::isProject() const - { - return operation.project; - } - - bool Shader::Instruction::isBias() const - { - return operation.bias; - } - - bool Shader::Instruction::isPredicate() const - { - return operation.predicate; - } - - bool Shader::Instruction::isPredicateNot() const - { - return operation.predicateNot; - } - - unsigned char Shader::Instruction::getPredicateSwizzle() const - { - return operation.predicateSwizzle; - } - - Shader::Instruction::Operation::Control Shader::Instruction::getControl() const - { - return operation.control; - } - - Shader::Instruction::Operation::Usage Shader::Instruction::getUsage() const - { - return operation.usage; - } - - unsigned char Shader::Instruction::getUsageIndex() const - { - return operation.usageIndex; - } - - Shader::Instruction::Operation::SamplerType Shader::Instruction::getSamplerType() const - { - return operation.samplerType; - } - std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const { std::string instructionString; - if(operation.opcode != Operation::OPCODE_DCL) + if(opcode != OPCODE_DCL) { - instructionString += operation.coissue ? "+ " : ""; + instructionString += coissue ? "+ " : ""; - if(operation.predicate) + if(predicate) { - instructionString += operation.predicateNot ? "(!p0" : "(p0"; - instructionString += swizzleString(Parameter::PARAMETER_PREDICATE, operation.predicateSwizzle); + instructionString += predicateNot ? "(!p0" : "(p0"; + instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle); instructionString += ") "; } - instructionString += operation.string(version) + operation.controlString() + destinationParameter.shiftString() + destinationParameter.modifierString(); + instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString(); - if(destinationParameter.type != Parameter::PARAMETER_VOID) + if(dst.type != PARAMETER_VOID) { - instructionString += " " + destinationParameter.string(shaderType, version) + - destinationParameter.relativeString() + - destinationParameter.maskString(); + instructionString += " " + dst.string(shaderType, version) + + dst.relativeString() + + dst.maskString(); } for(int i = 0; i < 4; i++) { - if(sourceParameter[i].type != Parameter::PARAMETER_VOID) + if(src[i].type != PARAMETER_VOID) { - instructionString += (destinationParameter.type != Parameter::PARAMETER_VOID || i > 0) ? ", " : " "; - instructionString += sourceParameter[i].preModifierString() + - sourceParameter[i].string(shaderType, version) + - sourceParameter[i].relativeString() + - sourceParameter[i].postModifierString() + - sourceParameter[i].swizzleString(); + instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " "; + instructionString += src[i].preModifierString() + + src[i].string(shaderType, version) + + src[i].relativeString() + + src[i].postModifierString() + + src[i].swizzleString(); } } } @@ -204,68 +234,68 @@ { instructionString += "dcl"; - if(destinationParameter.type == Parameter::PARAMETER_SAMPLER) + if(dst.type == PARAMETER_SAMPLER) { - switch(operation.samplerType) + switch(samplerType) { - case Operation::SAMPLER_UNKNOWN: instructionString += " "; break; - case Operation::SAMPLER_1D: instructionString += "_1d "; break; - case Operation::SAMPLER_2D: instructionString += "_2d "; break; - case Operation::SAMPLER_CUBE: instructionString += "_cube "; break; - case Operation::SAMPLER_VOLUME: instructionString += "_volume "; break; + case SAMPLER_UNKNOWN: instructionString += " "; break; + case SAMPLER_1D: instructionString += "_1d "; break; + case SAMPLER_2D: instructionString += "_2d "; break; + case SAMPLER_CUBE: instructionString += "_cube "; break; + case SAMPLER_VOLUME: instructionString += "_volume "; break; default: ASSERT(false); } - instructionString += destinationParameter.string(shaderType, version); + instructionString += dst.string(shaderType, version); } - else if(destinationParameter.type == Parameter::PARAMETER_INPUT || - destinationParameter.type == Parameter::PARAMETER_OUTPUT || - destinationParameter.type == Parameter::PARAMETER_TEXTURE) + else if(dst.type == PARAMETER_INPUT || + dst.type == PARAMETER_OUTPUT || + dst.type == PARAMETER_TEXTURE) { if(version >= 0x0300) { - switch(operation.usage) + switch(usage) { - case Operation::USAGE_POSITION: instructionString += "_position"; break; - case Operation::USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break; - case Operation::USAGE_BLENDINDICES: instructionString += "_blendindices"; break; - case Operation::USAGE_NORMAL: instructionString += "_normal"; break; - case Operation::USAGE_PSIZE: instructionString += "_psize"; break; - case Operation::USAGE_TEXCOORD: instructionString += "_texcoord"; break; - case Operation::USAGE_TANGENT: instructionString += "_tangent"; break; - case Operation::USAGE_BINORMAL: instructionString += "_binormal"; break; - case Operation::USAGE_TESSFACTOR: instructionString += "_tessfactor"; break; - case Operation::USAGE_POSITIONT: instructionString += "_positiont"; break; - case Operation::USAGE_COLOR: instructionString += "_color"; break; - case Operation::USAGE_FOG: instructionString += "_fog"; break; - case Operation::USAGE_DEPTH: instructionString += "_depth"; break; - case Operation::USAGE_SAMPLE: instructionString += "_sample"; break; + case USAGE_POSITION: instructionString += "_position"; break; + case USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break; + case USAGE_BLENDINDICES: instructionString += "_blendindices"; break; + case USAGE_NORMAL: instructionString += "_normal"; break; + case USAGE_PSIZE: instructionString += "_psize"; break; + case USAGE_TEXCOORD: instructionString += "_texcoord"; break; + case USAGE_TANGENT: instructionString += "_tangent"; break; + case USAGE_BINORMAL: instructionString += "_binormal"; break; + case USAGE_TESSFACTOR: instructionString += "_tessfactor"; break; + case USAGE_POSITIONT: instructionString += "_positiont"; break; + case USAGE_COLOR: instructionString += "_color"; break; + case USAGE_FOG: instructionString += "_fog"; break; + case USAGE_DEPTH: instructionString += "_depth"; break; + case USAGE_SAMPLE: instructionString += "_sample"; break; default: ASSERT(false); } - if(operation.usageIndex > 0) + if(usageIndex > 0) { std::ostringstream buffer; - buffer << (int)operation.usageIndex; + buffer << (int)usageIndex; instructionString += buffer.str(); } } - else ASSERT(destinationParameter.type != Parameter::PARAMETER_OUTPUT); + else ASSERT(dst.type != PARAMETER_OUTPUT); instructionString += " "; - instructionString += destinationParameter.string(shaderType, version); - instructionString += destinationParameter.maskString(); + instructionString += dst.string(shaderType, version); + instructionString += dst.maskString(); } - else if(destinationParameter.type == Parameter::PARAMETER_MISCTYPE) // vPos and vFace + else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace { instructionString += " "; - instructionString += destinationParameter.string(shaderType, version); + instructionString += dst.string(shaderType, version); } else ASSERT(false); } @@ -273,145 +303,7 @@ return instructionString; } - std::string Shader::Instruction::Operation::string(unsigned short version) const - { - switch(opcode) - { - case OPCODE_NOP: return "nop"; - case OPCODE_MOV: return "mov"; - case OPCODE_ADD: return "add"; - case OPCODE_SUB: return "sub"; - case OPCODE_MAD: return "mad"; - case OPCODE_MUL: return "mul"; - case OPCODE_RCP: return "rcp"; - case OPCODE_RSQ: return "rsq"; - case OPCODE_DP3: return "dp3"; - case OPCODE_DP4: return "dp4"; - case OPCODE_MIN: return "min"; - case OPCODE_MAX: return "max"; - case OPCODE_SLT: return "slt"; - case OPCODE_SGE: return "sge"; - case OPCODE_EXP: return "exp"; - case OPCODE_LOG: return "log"; - case OPCODE_LIT: return "lit"; - case OPCODE_DST: return "dst"; - case OPCODE_LRP: return "lrp"; - case OPCODE_FRC: return "frc"; - case OPCODE_M4X4: return "m4x4"; - case OPCODE_M4X3: return "m4x3"; - case OPCODE_M3X4: return "m3x4"; - case OPCODE_M3X3: return "m3x3"; - case OPCODE_M3X2: return "m3x2"; - case OPCODE_CALL: return "call"; - case OPCODE_CALLNZ: return "callnz"; - case OPCODE_LOOP: return "loop"; - case OPCODE_RET: return "ret"; - case OPCODE_ENDLOOP: return "endloop"; - case OPCODE_LABEL: return "label"; - case OPCODE_DCL: return "dcl"; - case OPCODE_POW: return "pow"; - case OPCODE_CRS: return "crs"; - case OPCODE_SGN: return "sgn"; - case OPCODE_ABS: return "abs"; - case OPCODE_NRM: return "nrm"; - case OPCODE_SINCOS: return "sincos"; - case OPCODE_REP: return "rep"; - case OPCODE_ENDREP: return "endrep"; - case OPCODE_IF: return "if"; - case OPCODE_IFC: return "ifc"; - case OPCODE_ELSE: return "else"; - case OPCODE_ENDIF: return "endif"; - case OPCODE_BREAK: return "break"; - case OPCODE_BREAKC: return "breakc"; - case OPCODE_MOVA: return "mova"; - case OPCODE_DEFB: return "defb"; - case OPCODE_DEFI: return "defi"; - case OPCODE_TEXCOORD: return "texcoord"; - case OPCODE_TEXKILL: return "texkill"; - case OPCODE_TEX: - if(version < 0x0104) return "tex"; - else return "texld"; - case OPCODE_TEXBEM: return "texbem"; - case OPCODE_TEXBEML: return "texbeml"; - case OPCODE_TEXREG2AR: return "texreg2ar"; - case OPCODE_TEXREG2GB: return "texreg2gb"; - case OPCODE_TEXM3X2PAD: return "texm3x2pad"; - case OPCODE_TEXM3X2TEX: return "texm3x2tex"; - case OPCODE_TEXM3X3PAD: return "texm3x3pad"; - case OPCODE_TEXM3X3TEX: return "texm3x3tex"; - case OPCODE_RESERVED0: return "reserved0"; - case OPCODE_TEXM3X3SPEC: return "texm3x3spec"; - case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec"; - case OPCODE_EXPP: return "expp"; - case OPCODE_LOGP: return "logp"; - case OPCODE_CND: return "cnd"; - case OPCODE_DEF: return "def"; - case OPCODE_TEXREG2RGB: return "texreg2rgb"; - case OPCODE_TEXDP3TEX: return "texdp3tex"; - case OPCODE_TEXM3X2DEPTH: return "texm3x2depth"; - case OPCODE_TEXDP3: return "texdp3"; - case OPCODE_TEXM3X3: return "texm3x3"; - case OPCODE_TEXDEPTH: return "texdepth"; - case OPCODE_CMP: return "cmp"; - case OPCODE_BEM: return "bem"; - case OPCODE_DP2ADD: return "dp2add"; - case OPCODE_DSX: return "dsx"; - case OPCODE_DSY: return "dsy"; - case OPCODE_TEXLDD: return "texldd"; - case OPCODE_SETP: return "setp"; - case OPCODE_TEXLDL: return "texldl"; - case OPCODE_BREAKP: return "breakp"; - case OPCODE_PHASE: return "phase"; - case OPCODE_COMMENT: return "comment"; - case OPCODE_END: return "end"; - case OPCODE_PS_1_0: return "ps_1_0"; - case OPCODE_PS_1_1: return "ps_1_1"; - case OPCODE_PS_1_2: return "ps_1_2"; - case OPCODE_PS_1_3: return "ps_1_3"; - case OPCODE_PS_1_4: return "ps_1_4"; - case OPCODE_PS_2_0: return "ps_2_0"; - case OPCODE_PS_2_x: return "ps_2_x"; - case OPCODE_PS_3_0: return "ps_3_0"; - case OPCODE_VS_1_0: return "vs_1_0"; - case OPCODE_VS_1_1: return "vs_1_1"; - case OPCODE_VS_2_0: return "vs_2_0"; - case OPCODE_VS_2_x: return "vs_2_x"; - case OPCODE_VS_2_sw: return "vs_2_sw"; - case OPCODE_VS_3_0: return "vs_3_0"; - case OPCODE_VS_3_sw: return "vs_3_sw"; - default: - ASSERT(false); - } - - return "<unknown>"; - } - - std::string Shader::Instruction::Operation::controlString() const - { - if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_SETP) - { - if(project) return "p"; - - if(bias) return "b"; - - // FIXME: LOD - } - - switch(control) - { - case 1: return "_gt"; - case 2: return "_eq"; - case 3: return "_ge"; - case 4: return "_lt"; - case 5: return "_ne"; - case 6: return "_le"; - default: - return ""; - // ASSERT(false); // FIXME - } - } - - std::string Shader::Instruction::DestinationParameter::modifierString() const + std::string Shader::DestinationParameter::modifierString() const { if(type == PARAMETER_VOID || type == PARAMETER_LABEL) { @@ -420,6 +312,11 @@ std::string modifierString; + if(integer) + { + modifierString += "_int"; + } + if(saturate) { modifierString += "_sat"; @@ -438,7 +335,7 @@ return modifierString; } - std::string Shader::Instruction::DestinationParameter::shiftString() const + std::string Shader::DestinationParameter::shiftString() const { if(type == PARAMETER_VOID || type == PARAMETER_LABEL) { @@ -460,7 +357,7 @@ } } - std::string Shader::Instruction::DestinationParameter::maskString() const + std::string Shader::DestinationParameter::maskString() const { if(type == PARAMETER_VOID || type == PARAMETER_LABEL) { @@ -492,7 +389,7 @@ return ""; } - std::string Shader::Instruction::SourceParameter::preModifierString() const + std::string Shader::SourceParameter::preModifierString() const { if(type == PARAMETER_VOID) { @@ -522,30 +419,48 @@ return ""; } - std::string Shader::Instruction::Parameter::relativeString() const + std::string Shader::Parameter::relativeString() const { - if(!relative) return ""; - - if(relativeType == Parameter::PARAMETER_ADDR) + if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP) { - switch(relativeSwizzle & 0x03) + if(rel.type == PARAMETER_VOID) { - case 0: return "[a0.x]"; - case 1: return "[a0.y]"; - case 2: return "[a0.z]"; - case 3: return "[a0.w]"; + return ""; } + else if(rel.type == PARAMETER_ADDR) + { + switch(rel.swizzle & 0x03) + { + case 0: return "[a0.x]"; + case 1: return "[a0.y]"; + case 2: return "[a0.z]"; + case 3: return "[a0.w]"; + } + } + else if(rel.type == PARAMETER_TEMP) + { + std::ostringstream buffer; + buffer << rel.index; + + switch(rel.swizzle & 0x03) + { + case 0: return "[r" + buffer.str() + ".x]"; + case 1: return "[r" + buffer.str() + ".y]"; + case 2: return "[r" + buffer.str() + ".z]"; + case 3: return "[r" + buffer.str() + ".w]"; + } + } + else if(rel.type == PARAMETER_LOOP) + { + return "[aL]"; + } + else ASSERT(false); } - else if(relativeType == Parameter::PARAMETER_LOOP) - { - return "[aL]"; - } - else ASSERT(false); return ""; } - std::string Shader::Instruction::SourceParameter::postModifierString() const + std::string Shader::SourceParameter::postModifierString() const { if(type == PARAMETER_VOID) { @@ -575,7 +490,7 @@ return ""; } - std::string Shader::Instruction::SourceParameter::swizzleString() const + std::string Shader::SourceParameter::swizzleString() const { return Instruction::swizzleString(type, swizzle); } @@ -584,19 +499,21 @@ { if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token { - operation.opcode = (Operation::Opcode)token; - operation.predicate = false; - operation.coissue = false; + opcode = (Opcode)token; + + control = CONTROL_RESERVED0; + predicate = false; + coissue = false; } else { - operation.opcode = (Operation::Opcode)(token & 0x0000FFFF); - operation.control = (Operation::Control)((token & 0x00FF0000) >> 16); + opcode = (Opcode)(token & 0x0000FFFF); + control = (Control)((token & 0x00FF0000) >> 16); int size = (token & 0x0F000000) >> 24; - operation.predicate = (token & 0x10000000) != 0x00000000; - operation.coissue = (token & 0x40000000) != 0x00000000; + predicate = (token & 0x10000000) != 0x00000000; + coissue = (token & 0x40000000) != 0x00000000; if(majorVersion < 2) { @@ -608,7 +525,7 @@ if(majorVersion < 2) { - if(operation.predicate) + if(predicate) { ASSERT(false); } @@ -621,7 +538,7 @@ if(majorVersion >= 2) { - if(operation.coissue) + if(coissue) { ASSERT(false); // Reserved } @@ -636,43 +553,44 @@ void Shader::Instruction::parseDeclarationToken(unsigned long token) { - operation.samplerType = (Operation::SamplerType)((token & 0x78000000) >> 27); - operation.usage = (Operation::Usage)(token & 0x0000001F); - operation.usageIndex = (unsigned char)((token & 0x000F0000) >> 16); + samplerType = (SamplerType)((token & 0x78000000) >> 27); + usage = (Usage)(token & 0x0000001F); + usageIndex = (unsigned char)((token & 0x000F0000) >> 16); } void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion) { - destinationParameter.index = (unsigned short)(token[0] & 0x000007FF); - destinationParameter.type = (Parameter::Type)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); + dst.index = (unsigned short)(token[0] & 0x000007FF); + dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); // TODO: Check type and index range - destinationParameter.relative = (token[0] & 0x00002000) != 0x00000000; - destinationParameter.relativeType = Parameter::PARAMETER_ADDR; - destinationParameter.relativeSwizzle = 0x00; + bool relative = (token[0] & 0x00002000) != 0x00000000; + dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; + dst.rel.swizzle = 0x00; + dst.rel.scale = 1; - if(destinationParameter.relative && majorVersion >= 3) + if(relative && majorVersion >= 3) { - destinationParameter.relativeType = (Parameter::Type)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); - destinationParameter.relativeSwizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); + dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); + dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); } - else if(destinationParameter.relative) ASSERT(false); // Reserved + else if(relative) ASSERT(false); // Reserved if((token[0] & 0x0000C000) != 0x00000000) { ASSERT(false); // Reserved } - destinationParameter.mask = (unsigned char)((token[0] & 0x000F0000) >> 16); - destinationParameter.saturate = (token[0] & 0x00100000) != 0; - destinationParameter.partialPrecision = (token[0] & 0x00200000) != 0; - destinationParameter.centroid = (token[0] & 0x00400000) != 0; - destinationParameter.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4; + dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16); + dst.saturate = (token[0] & 0x00100000) != 0; + dst.partialPrecision = (token[0] & 0x00200000) != 0; + dst.centroid = (token[0] & 0x00400000) != 0; + dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4; if(majorVersion >= 2) { - if(destinationParameter.shift) + if(dst.shift) { ASSERT(false); // Reserved } @@ -687,67 +605,71 @@ void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion) { // Defaults - sourceParameter[i].value = (float&)*token; - sourceParameter[i].type = Parameter::PARAMETER_VOID; - sourceParameter[i].modifier = SourceParameter::MODIFIER_NONE; - sourceParameter[i].swizzle = 0xE4; - sourceParameter[i].relative = false; - sourceParameter[i].relativeType = Parameter::PARAMETER_ADDR; - sourceParameter[i].relativeSwizzle = 0x00; + src[i].index = 0; + src[i].type = PARAMETER_VOID; + src[i].modifier = MODIFIER_NONE; + src[i].swizzle = 0xE4; + src[i].rel.type = PARAMETER_VOID; + src[i].rel.swizzle = 0x00; + src[i].rel.scale = 1; - switch(operation.opcode) + switch(opcode) { - case Instruction::Operation::OPCODE_DEF: - sourceParameter[i].type = Parameter::PARAMETER_FLOATLITERAL; + case OPCODE_DEF: + src[0].type = PARAMETER_FLOAT4LITERAL; + src[0].value[i] = *(float*)token; break; - case Instruction::Operation::OPCODE_DEFB: - sourceParameter[i].type = Parameter::PARAMETER_BOOLLITERAL; + case OPCODE_DEFB: + src[0].type = PARAMETER_BOOL1LITERAL; + src[0].boolean[0] = *(int*)token; break; - case Instruction::Operation::OPCODE_DEFI: - sourceParameter[i].type = Parameter::PARAMETER_INTLITERAL; + case OPCODE_DEFI: + src[0].type = PARAMETER_INT4LITERAL; + src[0].integer[i] = *(int*)token; break; default: - sourceParameter[i].index = (unsigned short)(token[0] & 0x000007FF); - sourceParameter[i].type = (Parameter::Type)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); + src[i].index = (unsigned short)(token[0] & 0x000007FF); + src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); // FIXME: Check type and index range - sourceParameter[i].relative = (token[0] & 0x00002000) != 0x00000000; + bool relative = (token[0] & 0x00002000) != 0x00000000; + src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; if((token[0] & 0x0000C000) != 0x00000000) { - if(operation.opcode != Operation::OPCODE_DEF && - operation.opcode != Operation::OPCODE_DEFI && - operation.opcode != Operation::OPCODE_DEFB) + if(opcode != OPCODE_DEF && + opcode != OPCODE_DEFI && + opcode != OPCODE_DEFB) { ASSERT(false); } } - sourceParameter[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16); - sourceParameter[i].modifier = (SourceParameter::Modifier)((token[0] & 0x0F000000) >> 24); + src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16); + src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24); if((token[0] & 0x80000000) != 0x80000000) { - if(operation.opcode != Operation::OPCODE_DEF && - operation.opcode != Operation::OPCODE_DEFI && - operation.opcode != Operation::OPCODE_DEFB) + if(opcode != OPCODE_DEF && + opcode != OPCODE_DEFI && + opcode != OPCODE_DEFB) { ASSERT(false); } } - if(sourceParameter[i].relative && majorVersion >= 2) + if(relative && majorVersion >= 2) { - sourceParameter[i].relativeType = (Parameter::Type)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); - sourceParameter[i].relativeSwizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); + src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); + src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); } } } - std::string Shader::Instruction::swizzleString(Parameter::Type type, unsigned char swizzle) + std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle) { - if(type == Parameter::PARAMETER_VOID || type == Parameter::PARAMETER_LABEL || swizzle == 0xE4) + if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4) { return ""; } @@ -803,32 +725,230 @@ return swizzleString; } - std::string Shader::Instruction::Parameter::string(ShaderType shaderType, unsigned short version) const + std::string Shader::Instruction::operationString(unsigned short version) const { - std::ostringstream buffer; - - if(type == PARAMETER_FLOATLITERAL) + switch(opcode) { - buffer << value; - - return buffer.str(); + case OPCODE_NOP: return "nop"; + case OPCODE_MOV: return "mov"; + case OPCODE_ADD: return "add"; + case OPCODE_SUB: return "sub"; + case OPCODE_MAD: return "mad"; + case OPCODE_MUL: return "mul"; + case OPCODE_RCPX: return "rcpx"; + case OPCODE_DIV: return "div"; + case OPCODE_MOD: return "mod"; + case OPCODE_RSQX: return "rsqx"; + case OPCODE_SQRT: return "sqrt"; + case OPCODE_RSQ: return "rsq"; + case OPCODE_LEN2: return "len2"; + case OPCODE_LEN3: return "len3"; + case OPCODE_LEN4: return "len4"; + case OPCODE_DIST1: return "dist1"; + case OPCODE_DIST2: return "dist2"; + case OPCODE_DIST3: return "dist3"; + case OPCODE_DIST4: return "dist4"; + case OPCODE_DP3: return "dp3"; + case OPCODE_DP4: return "dp4"; + case OPCODE_MIN: return "min"; + case OPCODE_MAX: return "max"; + case OPCODE_SLT: return "slt"; + case OPCODE_SGE: return "sge"; + case OPCODE_EXP2X: return "exp2x"; + case OPCODE_LOG2X: return "log2x"; + case OPCODE_LIT: return "lit"; + case OPCODE_ATT: return "att"; + case OPCODE_LRP: return "lrp"; + case OPCODE_STEP: return "step"; + case OPCODE_SMOOTH: return "smooth"; + case OPCODE_FRC: return "frc"; + case OPCODE_M4X4: return "m4x4"; + case OPCODE_M4X3: return "m4x3"; + case OPCODE_M3X4: return "m3x4"; + case OPCODE_M3X3: return "m3x3"; + case OPCODE_M3X2: return "m3x2"; + case OPCODE_CALL: return "call"; + case OPCODE_CALLNZ: return "callnz"; + case OPCODE_LOOP: return "loop"; + case OPCODE_RET: return "ret"; + case OPCODE_ENDLOOP: return "endloop"; + case OPCODE_LABEL: return "label"; + case OPCODE_DCL: return "dcl"; + case OPCODE_POWX: return "powx"; + case OPCODE_CRS: return "crs"; + case OPCODE_SGN: return "sgn"; + case OPCODE_ABS: return "abs"; + case OPCODE_NRM2: return "nrm2"; + case OPCODE_NRM3: return "nrm3"; + case OPCODE_NRM4: return "nrm4"; + case OPCODE_SINCOS: return "sincos"; + case OPCODE_REP: return "rep"; + case OPCODE_ENDREP: return "endrep"; + case OPCODE_IF: return "if"; + case OPCODE_IFC: return "ifc"; + case OPCODE_ELSE: return "else"; + case OPCODE_ENDIF: return "endif"; + case OPCODE_BREAK: return "break"; + case OPCODE_BREAKC: return "breakc"; + case OPCODE_MOVA: return "mova"; + case OPCODE_DEFB: return "defb"; + case OPCODE_DEFI: return "defi"; + case OPCODE_TEXCOORD: return "texcoord"; + case OPCODE_TEXKILL: return "texkill"; + case OPCODE_DISCARD: return "discard"; + case OPCODE_TEX: + if(version < 0x0104) return "tex"; + else return "texld"; + case OPCODE_TEXBEM: return "texbem"; + case OPCODE_TEXBEML: return "texbeml"; + case OPCODE_TEXREG2AR: return "texreg2ar"; + case OPCODE_TEXREG2GB: return "texreg2gb"; + case OPCODE_TEXM3X2PAD: return "texm3x2pad"; + case OPCODE_TEXM3X2TEX: return "texm3x2tex"; + case OPCODE_TEXM3X3PAD: return "texm3x3pad"; + case OPCODE_TEXM3X3TEX: return "texm3x3tex"; + case OPCODE_RESERVED0: return "reserved0"; + case OPCODE_TEXM3X3SPEC: return "texm3x3spec"; + case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec"; + case OPCODE_EXPP: return "expp"; + case OPCODE_LOGP: return "logp"; + case OPCODE_CND: return "cnd"; + case OPCODE_DEF: return "def"; + case OPCODE_TEXREG2RGB: return "texreg2rgb"; + case OPCODE_TEXDP3TEX: return "texdp3tex"; + case OPCODE_TEXM3X2DEPTH: return "texm3x2depth"; + case OPCODE_TEXDP3: return "texdp3"; + case OPCODE_TEXM3X3: return "texm3x3"; + case OPCODE_TEXDEPTH: return "texdepth"; + case OPCODE_CMP0: return "cmp0"; + case OPCODE_ICMP: return "icmp"; + case OPCODE_SELECT: return "select"; + case OPCODE_EXTRACT: return "extract"; + case OPCODE_INSERT: return "insert"; + case OPCODE_BEM: return "bem"; + case OPCODE_DP2ADD: return "dp2add"; + case OPCODE_DFDX: return "dFdx"; + case OPCODE_DFDY: return "dFdy"; + case OPCODE_FWIDTH: return "fwidth"; + case OPCODE_TEXLDD: return "texldd"; + case OPCODE_CMP: return "cmp"; + case OPCODE_TEXLDL: return "texldl"; + case OPCODE_BREAKP: return "breakp"; + case OPCODE_PHASE: return "phase"; + case OPCODE_COMMENT: return "comment"; + case OPCODE_END: return "end"; + case OPCODE_PS_1_0: return "ps_1_0"; + case OPCODE_PS_1_1: return "ps_1_1"; + case OPCODE_PS_1_2: return "ps_1_2"; + case OPCODE_PS_1_3: return "ps_1_3"; + case OPCODE_PS_1_4: return "ps_1_4"; + case OPCODE_PS_2_0: return "ps_2_0"; + case OPCODE_PS_2_x: return "ps_2_x"; + case OPCODE_PS_3_0: return "ps_3_0"; + case OPCODE_VS_1_0: return "vs_1_0"; + case OPCODE_VS_1_1: return "vs_1_1"; + case OPCODE_VS_2_0: return "vs_2_0"; + case OPCODE_VS_2_x: return "vs_2_x"; + case OPCODE_VS_2_sw: return "vs_2_sw"; + case OPCODE_VS_3_0: return "vs_3_0"; + case OPCODE_VS_3_sw: return "vs_3_sw"; + case OPCODE_WHILE: return "while"; + case OPCODE_ENDWHILE: return "endwhile"; + case OPCODE_COS: return "cos"; + case OPCODE_SIN: return "sin"; + case OPCODE_TAN: return "tan"; + case OPCODE_ACOS: return "acos"; + case OPCODE_ASIN: return "asin"; + case OPCODE_ATAN: return "atan"; + case OPCODE_ATAN2: return "atan2"; + case OPCODE_DP1: return "dp1"; + case OPCODE_DP2: return "dp2"; + case OPCODE_TRUNC: return "trunc"; + case OPCODE_FLOOR: return "floor"; + case OPCODE_CEIL: return "ceil"; + case OPCODE_EXP2: return "exp2"; + case OPCODE_LOG2: return "log2"; + case OPCODE_EXP: return "exp"; + case OPCODE_LOG: return "log"; + case OPCODE_POW: return "pow"; + case OPCODE_F2B: return "f2b"; + case OPCODE_B2F: return "b2f"; + case OPCODE_ALL: return "all"; + case OPCODE_ANY: return "any"; + case OPCODE_NOT: return "not"; + case OPCODE_OR: return "or"; + case OPCODE_XOR: return "xor"; + case OPCODE_AND: return "and"; + case OPCODE_FORWARD1: return "forward1"; + case OPCODE_FORWARD2: return "forward2"; + case OPCODE_FORWARD3: return "forward3"; + case OPCODE_FORWARD4: return "forward4"; + case OPCODE_REFLECT1: return "reflect1"; + case OPCODE_REFLECT2: return "reflect2"; + case OPCODE_REFLECT3: return "reflect3"; + case OPCODE_REFLECT4: return "reflect4"; + case OPCODE_REFRACT1: return "refract1"; + case OPCODE_REFRACT2: return "refract2"; + case OPCODE_REFRACT3: return "refract3"; + case OPCODE_REFRACT4: return "refract4"; + case OPCODE_LEAVE: return "leave"; + case OPCODE_CONTINUE: return "continue"; + case OPCODE_TEST: return "test"; + default: + ASSERT(false); } - else - { - if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE) - { - buffer << index; - return typeString(shaderType, version) + buffer.str(); - } - else - { - return typeString(shaderType, version); - } + return "<unknown>"; + } + + std::string Shader::Instruction::controlString() const + { + if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP) + { + if(project) return "p"; + + if(bias) return "b"; + + // FIXME: LOD + } + + switch(control) + { + case 1: return "_gt"; + case 2: return "_eq"; + case 3: return "_ge"; + case 4: return "_lt"; + case 5: return "_ne"; + case 6: return "_le"; + default: + return ""; + // ASSERT(false); // FIXME } } - std::string Shader::Instruction::Parameter::typeString(ShaderType shaderType, unsigned short version) const + std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const + { + std::ostringstream buffer; + + if(type == PARAMETER_FLOAT4LITERAL) + { + buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}'; + + return buffer.str(); + } + else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE) + { + buffer << index; + + return typeString(shaderType, version) + buffer.str(); + } + else + { + return typeString(shaderType, version); + } + } + + std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const { switch(type) { @@ -865,9 +985,9 @@ else ASSERT(false); case PARAMETER_LABEL: return "l"; case PARAMETER_PREDICATE: return "p0"; - case PARAMETER_FLOATLITERAL: return ""; - case PARAMETER_BOOLLITERAL: return ""; - case PARAMETER_INTLITERAL: return ""; + case PARAMETER_FLOAT4LITERAL: return ""; + case PARAMETER_BOOL1LITERAL: return ""; + case PARAMETER_INT4LITERAL: return ""; // case PARAMETER_VOID: return ""; default: ASSERT(false); @@ -876,55 +996,83 @@ return ""; } - Shader::Shader(const unsigned long *shaderToken) + bool Shader::Instruction::isBranch() const { - instruction = 0; - length = 0; + return opcode == OPCODE_IF || opcode == OPCODE_IFC; + } + + bool Shader::Instruction::isCall() const + { + return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ; + } - tokenCount = 0; + bool Shader::Instruction::isBreak() const + { + return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP; + } - while(shaderToken[tokenCount] != 0x0000FFFF) - { - tokenCount += sw::Shader::size(shaderToken[tokenCount], (unsigned short)(shaderToken[0] & 0xFFFF)) + 1; - } + bool Shader::Instruction::isLoop() const + { + return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE; + } - tokenCount += 1; + bool Shader::Instruction::isEndLoop() const + { + return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE; + } - this->shaderToken = new unsigned long[tokenCount]; - memcpy(this->shaderToken, shaderToken, tokenCount * sizeof(unsigned long)); - - unsigned long *hashTokens = new unsigned long[tokenCount]; - memcpy(hashTokens, shaderToken, tokenCount * sizeof(unsigned long)); - removeComments(hashTokens, tokenCount); - hash = FNV_1((unsigned char*)hashTokens, tokenCount * sizeof(unsigned long)); - delete[] hashTokens; + Shader::Shader() : serialID(serialCounter++) + { + usedSamplers = 0; } Shader::~Shader() { - delete[] shaderToken; - shaderToken = 0; - - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { delete instruction[i]; instruction[i] = 0; } - - delete[] instruction; - instruction = 0; } - void Shader::getFunction(void *data, unsigned int *size) + void Shader::parse(const unsigned long *token) { - if(data) + minorVersion = (unsigned char)(token[0] & 0x000000FF); + majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8); + shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16); + + int length; + + if(shaderType == SHADER_VERTEX) { - memcpy(data, shaderToken, tokenCount * 4); + length = VertexShader::validate(token); } + else if(shaderType == SHADER_PIXEL) + { + length = PixelShader::validate(token); + } + else ASSERT(false); - *size = tokenCount * 4; + ASSERT(length != 0); + instruction.resize(length); + + for(int i = 0; i < length; i++) + { + while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token + { + int length = (*token & 0x7FFF0000) >> 16; + + token += length + 1; + } + + int tokenCount = size(*token); + + instruction[i] = new Instruction(token, tokenCount, majorVersion); + + token += 1 + tokenCount; + } } - + int Shader::size(unsigned long opcode) const { return size(opcode, version); @@ -1056,28 +1204,28 @@ int length = 0; - if((opcode & 0x0000FFFF) == ShaderOperation::OPCODE_COMMENT) + if((opcode & 0x0000FFFF) == OPCODE_COMMENT) { return (opcode & 0x7FFF0000) >> 16; } - if(opcode != ShaderOperation::OPCODE_PS_1_0 && - opcode != ShaderOperation::OPCODE_PS_1_1 && - opcode != ShaderOperation::OPCODE_PS_1_2 && - opcode != ShaderOperation::OPCODE_PS_1_3 && - opcode != ShaderOperation::OPCODE_PS_1_4 && - opcode != ShaderOperation::OPCODE_PS_2_0 && - opcode != ShaderOperation::OPCODE_PS_2_x && - opcode != ShaderOperation::OPCODE_PS_3_0 && - opcode != ShaderOperation::OPCODE_VS_1_0 && - opcode != ShaderOperation::OPCODE_VS_1_1 && - opcode != ShaderOperation::OPCODE_VS_2_0 && - opcode != ShaderOperation::OPCODE_VS_2_x && - opcode != ShaderOperation::OPCODE_VS_2_sw && - opcode != ShaderOperation::OPCODE_VS_3_0 && - opcode != ShaderOperation::OPCODE_VS_3_sw && - opcode != ShaderOperation::OPCODE_PHASE && - opcode != ShaderOperation::OPCODE_END) + if(opcode != OPCODE_PS_1_0 && + opcode != OPCODE_PS_1_1 && + opcode != OPCODE_PS_1_2 && + opcode != OPCODE_PS_1_3 && + opcode != OPCODE_PS_1_4 && + opcode != OPCODE_PS_2_0 && + opcode != OPCODE_PS_2_x && + opcode != OPCODE_PS_3_0 && + opcode != OPCODE_VS_1_0 && + opcode != OPCODE_VS_1_1 && + opcode != OPCODE_VS_2_0 && + opcode != OPCODE_VS_2_x && + opcode != OPCODE_VS_2_sw && + opcode != OPCODE_VS_3_0 && + opcode != OPCODE_VS_3_sw && + opcode != OPCODE_PHASE && + opcode != OPCODE_END) { if(version >= 0x0200) { @@ -1098,10 +1246,10 @@ { switch(opcode & 0x0000FFFF) { - case ShaderOperation::OPCODE_TEX: + case OPCODE_TEX: length += 1; break; - case ShaderOperation::OPCODE_TEXCOORD: + case OPCODE_TEXCOORD: length += 1; break; default: @@ -1142,19 +1290,34 @@ return dynamicBranching; } - bool Shader::usesSampler(int index) const + bool Shader::containsBreakInstruction() const { - return (sampler & (1 << index)) != 0; + return containsBreak; } - int64_t Shader::getHash() const + bool Shader::containsContinueInstruction() const { - return hash; + return containsContinue; + } + + bool Shader::containsLeaveInstruction() const + { + return containsLeave; + } + + bool Shader::usesSampler(int index) const + { + return (usedSamplers & (1 << index)) != 0; + } + + int Shader::getSerialID() const + { + return serialID; } int Shader::getLength() const { - return length; + return instruction.size(); } Shader::ShaderType Shader::getShaderType() const @@ -1176,9 +1339,9 @@ vsnprintf(fullName, 1024, fileName, vararg); va_end(vararg); - std::ofstream file(fullName, std::ofstream::out | std::ofstream::app); + std::ofstream file(fullName, std::ofstream::out); - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { file << instruction[i]->string(shaderType, version) << std::endl; } @@ -1191,12 +1354,19 @@ file << instruction[index]->string(shaderType, version) << std::endl; } - const ShaderInstruction *Shader::getInstruction(int i) const + void Shader::append(Instruction *instruction) { - if(i < 0 || i >= length) - { - ASSERT(false); - } + this->instruction.push_back(instruction); + } + + void Shader::declareSampler(int i) + { + usedSamplers |= 1 << i; + } + + const Shader::Instruction *Shader::getInstruction(unsigned int i) const + { + ASSERT(i < instruction.size()); return instruction[i]; } @@ -1207,26 +1377,26 @@ dirtyConstantsI = 0; dirtyConstantsB = 0; - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - switch(instruction[i]->operation.opcode) + switch(instruction[i]->opcode) { - case ShaderOperation::OPCODE_DEF: - if(instruction[i]->destinationParameter.index + 1 > dirtyConstantsF) + case OPCODE_DEF: + if(instruction[i]->dst.index + 1 > dirtyConstantsF) { - dirtyConstantsF = instruction[i]->destinationParameter.index + 1; + dirtyConstantsF = instruction[i]->dst.index + 1; } break; - case ShaderOperation::OPCODE_DEFI: - if(instruction[i]->destinationParameter.index + 1 > dirtyConstantsI) + case OPCODE_DEFI: + if(instruction[i]->dst.index + 1 > dirtyConstantsI) { - dirtyConstantsI = instruction[i]->destinationParameter.index + 1; + dirtyConstantsI = instruction[i]->dst.index + 1; } break; - case ShaderOperation::OPCODE_DEFB: - if(instruction[i]->destinationParameter.index + 1 > dirtyConstantsB) + case OPCODE_DEFB: + if(instruction[i]->dst.index + 1 > dirtyConstantsB) { - dirtyConstantsB = instruction[i]->destinationParameter.index + 1; + dirtyConstantsB = instruction[i]->dst.index + 1; } break; } @@ -1236,61 +1406,205 @@ void Shader::analyzeDynamicBranching() { dynamicBranching = false; + containsLeave = false; + containsBreak = false; + containsContinue = false; - for(int i = 0; i < length; i++) + // Determine global presence of branching instructions + for(unsigned int i = 0; i < instruction.size(); i++) { - switch(instruction[i]->getOpcode()) + switch(instruction[i]->opcode) { - case ShaderOperation::OPCODE_CALLNZ: - case ShaderOperation::OPCODE_IF: - case ShaderOperation::OPCODE_IFC: - case ShaderOperation::OPCODE_BREAK: - case ShaderOperation::OPCODE_BREAKC: - case ShaderOperation::OPCODE_SETP: - case ShaderOperation::OPCODE_BREAKP: - if(instruction[i]->sourceParameter[0].type != ShaderParameter::PARAMETER_CONSTBOOL) + case OPCODE_CALLNZ: + case OPCODE_IF: + case OPCODE_IFC: + case OPCODE_BREAK: + case OPCODE_BREAKC: + case OPCODE_CMP: + case OPCODE_BREAKP: + case OPCODE_LEAVE: + case OPCODE_CONTINUE: + if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL) { dynamicBranching = true; + } + + if(instruction[i]->opcode == OPCODE_LEAVE) + { + containsLeave = true; + } + + if(instruction[i]->isBreak()) + { + containsBreak = true; + } + + if(instruction[i]->opcode == OPCODE_CONTINUE) + { + containsContinue = true; + } + } + } + + // Conservatively determine which instructions are affected by dynamic branching + int branchDepth = 0; + int breakDepth = 0; + int continueDepth = 0; + bool leaveReturn = false; + + for(unsigned int i = 0; i < instruction.size(); i++) + { + // If statements + if(instruction[i]->isBranch()) + { + branchDepth++; + } + else if(instruction[i]->opcode == OPCODE_ENDIF) + { + branchDepth--; + } + + if(branchDepth > 0) + { + instruction[i]->analysisBranch = true; + + if(instruction[i]->isCall()) + { + markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); + } + } + + // Break statemement + if(instruction[i]->isBreak()) + { + breakDepth++; + } + else if(instruction[i]->isEndLoop()) + { + breakDepth--; + } + + if(breakDepth > 0) + { + if(instruction[i]->isLoop()) // Nested loop, don't make the end of it disable the break execution mask + { + breakDepth++; + } + + instruction[i]->analysisBreak = true; + + if(instruction[i]->isCall()) + { + markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); + } + } + + // Continue statement + if(instruction[i]->opcode == OPCODE_CONTINUE) + { + continueDepth++; + } + else if(instruction[i]->isEndLoop()) + { + continueDepth--; + } + + if(continueDepth > 0) + { + if(instruction[i]->isLoop()) // Nested loop, don't make the end of it disable the break execution mask + { + continueDepth++; + } + + instruction[i]->analysisContinue = true; + + if(instruction[i]->isCall()) + { + markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE); + } + } + + // Return (leave) statement + if(instruction[i]->opcode == OPCODE_LEAVE) + { + leaveReturn = true; + } + else if(instruction[i]->opcode == OPCODE_RET) // End of the function + { + leaveReturn = false; + } + + if(leaveReturn) + { + instruction[i]->analysisLeave = true; + + if(instruction[i]->isCall()) + { + markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE); + } + } + } + } + + void Shader::markFunctionAnalysis(int functionLabel, Analysis flag) + { + bool marker = false; + for(unsigned int i = 0; i < instruction.size(); i++) + { + if(!marker) + { + if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel) + { + marker = true; + } + } + else + { + if(instruction[i]->opcode == OPCODE_RET) + { break; } + else if(instruction[i]->isCall()) + { + markFunctionAnalysis(instruction[i]->dst.label, flag); + } + + instruction[i]->analysis |= flag; } } } void Shader::analyzeSamplers() { - sampler = 0; - - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - switch(instruction[i]->getOpcode()) + switch(instruction[i]->opcode) { - case ShaderOperation::OPCODE_TEX: - case ShaderOperation::OPCODE_TEXBEM: - case ShaderOperation::OPCODE_TEXBEML: - case ShaderOperation::OPCODE_TEXREG2AR: - case ShaderOperation::OPCODE_TEXREG2GB: - case ShaderOperation::OPCODE_TEXM3X2TEX: - case ShaderOperation::OPCODE_TEXM3X3TEX: - case ShaderOperation::OPCODE_TEXM3X3SPEC: - case ShaderOperation::OPCODE_TEXM3X3VSPEC: - case ShaderOperation::OPCODE_TEXREG2RGB: - case ShaderOperation::OPCODE_TEXDP3TEX: - case ShaderOperation::OPCODE_TEXM3X2DEPTH: - case ShaderOperation::OPCODE_TEXLDD: - case ShaderOperation::OPCODE_TEXLDL: + case OPCODE_TEX: + case OPCODE_TEXBEM: + case OPCODE_TEXBEML: + case OPCODE_TEXREG2AR: + case OPCODE_TEXREG2GB: + case OPCODE_TEXM3X2TEX: + case OPCODE_TEXM3X3TEX: + case OPCODE_TEXM3X3SPEC: + case OPCODE_TEXM3X3VSPEC: + case OPCODE_TEXREG2RGB: + case OPCODE_TEXDP3TEX: + case OPCODE_TEXM3X2DEPTH: + case OPCODE_TEXLDD: + case OPCODE_TEXLDL: { - ShaderParameter &dst = instruction[i]->destinationParameter; - ShaderParameter &src1 = instruction[i]->sourceParameter[1]; + Parameter &dst = instruction[i]->dst; + Parameter &src1 = instruction[i]->src[1]; if(majorVersion >= 2) { - ASSERT(src1.type == ShaderParameter::PARAMETER_SAMPLER); - sampler |= 1 << src1.index; + usedSamplers |= 1 << src1.index; } else { - sampler |= 1 << dst.index; + usedSamplers |= 1 << dst.index; } } break; @@ -1298,21 +1612,57 @@ } } - void Shader::removeComments(unsigned long *shaderToken, int tokenCount) + // Assigns a unique index to each call instruction, on a per label basis. + // This is used to know what basic block to return to. + void Shader::analyzeCallSites() { - for(int i = 0; i < tokenCount; ) - { - int instructionSize = sw::Shader::size(shaderToken[i], (unsigned short)(shaderToken[0] & 0xFFFF)) + 1; + int callSiteIndex[2048] = {0}; - if((shaderToken[i] & 0x0000FFFF) == ShaderOperation::OPCODE_COMMENT) + for(unsigned int i = 0; i < instruction.size(); i++) + { + if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ) { - for(int j = 0; j < instructionSize; j++) + int label = instruction[i]->dst.label; + + instruction[i]->dst.callSite = callSiteIndex[label]++; + } + } + } + + void Shader::analyzeDynamicIndexing() + { + dynamicallyIndexedTemporaries = false; + dynamicallyIndexedInput = false; + dynamicallyIndexedOutput = false; + + for(unsigned int i = 0; i < instruction.size(); i++) + { + if(instruction[i]->dst.rel.type == PARAMETER_ADDR || + instruction[i]->dst.rel.type == PARAMETER_LOOP || + instruction[i]->dst.rel.type == PARAMETER_TEMP) + { + switch(instruction[i]->dst.type) { - shaderToken[i + j] = ShaderOperation::OPCODE_NOP; + case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; + case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; + case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; } } - i += instructionSize; + for(int j = 0; j < 3; j++) + { + if(instruction[i]->src[j].rel.type == PARAMETER_ADDR || + instruction[i]->src[j].rel.type == PARAMETER_LOOP || + instruction[i]->src[j].rel.type == PARAMETER_TEMP) + { + switch(instruction[i]->src[j].type) + { + case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; + case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; + case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; + } + } + } } } }
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp index d2d6fe3..b08b56e 100644 --- a/src/Shader/Shader.hpp +++ b/src/Shader/Shader.hpp
@@ -1,459 +1,572 @@ -// SwiftShader Software Renderer -// -// Copyright(c) 2005-2011 TransGaming Inc. -// -// All rights reserved. No part of this software may be copied, distributed, transmitted, -// transcribed, stored in a retrieval system, translated into any human or computer -// language by any means, or disclosed to third parties without the explicit written -// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express -// or implied, including but not limited to any patent rights, are granted to you. -// - -#ifndef sw_Shader_hpp -#define sw_Shader_hpp - -#include "Common/Types.hpp" - -#include <string> - -namespace sw -{ - class Shader - { - public: - enum ShaderType - { - SHADER_PIXEL = 0xFFFF, - SHADER_VERTEX = 0xFFFE, - SHADER_GEOMETRY = 0xFFFD - }; - - class Instruction - { - friend Shader; - - public: - Instruction(); - Instruction(const unsigned long *token, int size, unsigned char majorVersion); - - virtual ~Instruction(); - - struct Operation - { - enum Opcode - { - // Extracted from d3d9types.h - OPCODE_NOP = 0, - OPCODE_MOV, - OPCODE_ADD, - OPCODE_SUB, - OPCODE_MAD, - OPCODE_MUL, - OPCODE_RCP, - OPCODE_RSQ, - OPCODE_DP3, - OPCODE_DP4, - OPCODE_MIN, - OPCODE_MAX, - OPCODE_SLT, - OPCODE_SGE, - OPCODE_EXP, - OPCODE_LOG, - OPCODE_LIT, - OPCODE_DST, - OPCODE_LRP, - OPCODE_FRC, - OPCODE_M4X4, - OPCODE_M4X3, - OPCODE_M3X4, - OPCODE_M3X3, - OPCODE_M3X2, - OPCODE_CALL, - OPCODE_CALLNZ, - OPCODE_LOOP, - OPCODE_RET, - OPCODE_ENDLOOP, - OPCODE_LABEL, - OPCODE_DCL, - OPCODE_POW, - OPCODE_CRS, - OPCODE_SGN, - OPCODE_ABS, - OPCODE_NRM, - OPCODE_SINCOS, - OPCODE_REP, - OPCODE_ENDREP, - OPCODE_IF, - OPCODE_IFC, - OPCODE_ELSE, - OPCODE_ENDIF, - OPCODE_BREAK, - OPCODE_BREAKC, - OPCODE_MOVA, - OPCODE_DEFB, - OPCODE_DEFI, - - OPCODE_TEXCOORD = 64, - OPCODE_TEXKILL, - OPCODE_TEX, - OPCODE_TEXBEM, - OPCODE_TEXBEML, - OPCODE_TEXREG2AR, - OPCODE_TEXREG2GB, - OPCODE_TEXM3X2PAD, - OPCODE_TEXM3X2TEX, - OPCODE_TEXM3X3PAD, - OPCODE_TEXM3X3TEX, - OPCODE_RESERVED0, - OPCODE_TEXM3X3SPEC, - OPCODE_TEXM3X3VSPEC, - OPCODE_EXPP, - OPCODE_LOGP, - OPCODE_CND, - OPCODE_DEF, - OPCODE_TEXREG2RGB, - OPCODE_TEXDP3TEX, - OPCODE_TEXM3X2DEPTH, - OPCODE_TEXDP3, - OPCODE_TEXM3X3, - OPCODE_TEXDEPTH, - OPCODE_CMP, - OPCODE_BEM, - OPCODE_DP2ADD, - OPCODE_DSX, - OPCODE_DSY, - OPCODE_TEXLDD, - OPCODE_SETP, - OPCODE_TEXLDL, - OPCODE_BREAKP, - - OPCODE_PHASE = 0xFFFD, - OPCODE_COMMENT = 0xFFFE, - OPCODE_END = 0xFFFF, - - OPCODE_PS_1_0 = 0xFFFF0100, - OPCODE_PS_1_1 = 0xFFFF0101, - OPCODE_PS_1_2 = 0xFFFF0102, - OPCODE_PS_1_3 = 0xFFFF0103, - OPCODE_PS_1_4 = 0xFFFF0104, - OPCODE_PS_2_0 = 0xFFFF0200, - OPCODE_PS_2_x = 0xFFFF0201, - OPCODE_PS_3_0 = 0xFFFF0300, - - OPCODE_VS_1_0 = 0xFFFE0100, - OPCODE_VS_1_1 = 0xFFFE0101, - OPCODE_VS_2_0 = 0xFFFE0200, - OPCODE_VS_2_x = 0xFFFE0201, - OPCODE_VS_2_sw = 0xFFFE02FF, - OPCODE_VS_3_0 = 0xFFFE0300, - OPCODE_VS_3_sw = 0xFFFE03FF, - }; - - enum Control - { - CONTROL_RESERVED0, - CONTROL_GT, - CONTROL_EQ, - CONTROL_GE, - CONTROL_LT, - CONTROL_NE, - CONTROL_LE, - CONTROL_RESERVED1 - }; - - enum SamplerType - { - SAMPLER_UNKNOWN, - SAMPLER_1D, - SAMPLER_2D, - SAMPLER_CUBE, - SAMPLER_VOLUME - }; - - enum Usage // For vertex input/output declarations - { - USAGE_POSITION = 0, - USAGE_BLENDWEIGHT = 1, - USAGE_BLENDINDICES = 2, - USAGE_NORMAL = 3, - USAGE_PSIZE = 4, - USAGE_TEXCOORD = 5, - USAGE_TANGENT = 6, - USAGE_BINORMAL = 7, - USAGE_TESSFACTOR = 8, - USAGE_POSITIONT = 9, - USAGE_COLOR = 10, - USAGE_FOG = 11, - USAGE_DEPTH = 12, - USAGE_SAMPLE = 13 - }; - - Operation() : opcode(OPCODE_NOP), control(CONTROL_RESERVED0), predicate(false), predicateNot(false), predicateSwizzle(0xE4), coissue(false), samplerType(SAMPLER_UNKNOWN), usage(USAGE_POSITION), usageIndex(0) - { - } - - std::string string(unsigned short version) const; - std::string controlString() const; - - Opcode opcode; - - union - { - Control control; - - struct - { - unsigned char project : 1; - unsigned char bias : 1; - }; - }; - - bool predicate; - bool predicateNot; // Negative predicate - unsigned char predicateSwizzle; - - bool coissue; - SamplerType samplerType; - Usage usage; - unsigned char usageIndex; - }; - - struct Parameter - { - enum Type - { - PARAMETER_TEMP = 0, - PARAMETER_INPUT = 1, - PARAMETER_CONST = 2, - PARAMETER_TEXTURE = 3, - PARAMETER_ADDR = 3, - PARAMETER_RASTOUT = 4, - PARAMETER_ATTROUT = 5, - PARAMETER_TEXCRDOUT = 6, - PARAMETER_OUTPUT = 6, - PARAMETER_CONSTINT = 7, - PARAMETER_COLOROUT = 8, - PARAMETER_DEPTHOUT = 9, - PARAMETER_SAMPLER = 10, - PARAMETER_CONST2 = 11, - PARAMETER_CONST3 = 12, - PARAMETER_CONST4 = 13, - PARAMETER_CONSTBOOL = 14, - PARAMETER_LOOP = 15, - PARAMETER_TEMPFLOAT16 = 16, - PARAMETER_MISCTYPE = 17, - PARAMETER_LABEL = 18, - PARAMETER_PREDICATE = 19, - - // Internally used - PARAMETER_FLOATLITERAL = 20, - PARAMETER_BOOLLITERAL = 21, - PARAMETER_INTLITERAL = 22, - - PARAMETER_VOID - }; - - union - { - unsigned int index; // For registers - float value; // For float constants - int integer; // For integer constants - bool boolean; // For boolean constants - }; - - Parameter() : type(PARAMETER_VOID), index(0), relative(false), relativeType(PARAMETER_VOID), relativeSwizzle(0xE4) - { - } - - std::string string(ShaderType shaderType, unsigned short version) const; - std::string typeString(ShaderType shaderType, unsigned short version) const; - std::string relativeString() const; - - Type type; - bool relative; - Type relativeType; - unsigned char relativeSwizzle; - }; - - struct DestinationParameter : Parameter - { - union - { - unsigned char mask; - - struct - { - bool x : 1; - bool y : 1; - bool z : 1; - bool w : 1; - }; - }; - - DestinationParameter() : mask(0xF), saturate(false), partialPrecision(false), centroid(false), shift(0) - { - } - - std::string modifierString() const; - std::string shiftString() const; - std::string maskString() const; - - bool saturate; - bool partialPrecision; - bool centroid; - signed char shift; - }; - - struct SourceParameter : Parameter - { - enum Modifier - { - MODIFIER_NONE, - MODIFIER_NEGATE, - MODIFIER_BIAS, - MODIFIER_BIAS_NEGATE, - MODIFIER_SIGN, - MODIFIER_SIGN_NEGATE, - MODIFIER_COMPLEMENT, - MODIFIER_X2, - MODIFIER_X2_NEGATE, - MODIFIER_DZ, - MODIFIER_DW, - MODIFIER_ABS, - MODIFIER_ABS_NEGATE, - MODIFIER_NOT - }; - - SourceParameter() : swizzle(0xE4), modifier(MODIFIER_NONE) - { - } - - std::string swizzleString() const; - std::string preModifierString() const; - std::string postModifierString() const; - - unsigned char swizzle; - Modifier modifier; - }; - - void parseOperationToken(unsigned long token, unsigned char majorVersion); - void parseDeclarationToken(unsigned long token); - void parseDestinationToken(const unsigned long *token, unsigned char majorVersion); - void parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion); - - Operation::Opcode getOpcode() const; - const DestinationParameter &getDestinationParameter() const; - const SourceParameter &getSourceParameter(int i) const; - - bool isCoissue() const; - bool isProject() const; - bool isBias() const; - bool isPredicate() const; - bool isPredicateNot() const; - unsigned char getPredicateSwizzle() const; - Operation::Control getControl() const; - Operation::Usage getUsage() const; - unsigned char getUsageIndex() const; - Operation::SamplerType getSamplerType() const; - - std::string string(ShaderType shaderType, unsigned short version) const; - - protected: - Operation operation; - DestinationParameter destinationParameter; - SourceParameter sourceParameter[4]; - - private: - static std::string swizzleString(Parameter::Type type, unsigned char swizzle); - }; - - Shader(const unsigned long *shaderToken); - - ~Shader(); - - void getFunction(void *data, unsigned int *size); - - int64_t getHash() const; - int getLength() const; - ShaderType getShaderType() const; - unsigned short getVersion() const; - - const Instruction *getInstruction(int i) const; - int size(unsigned long opcode) const; - static int size(unsigned long opcode, unsigned short version); - - void print(const char *fileName, ...) const; - void printInstruction(int index, const char *fileName) const; - - static bool maskContainsComponent(int mask, int component); - static bool swizzleContainsComponent(int swizzle, int component); - static bool swizzleContainsComponentMasked(int swizzle, int component, int mask); - - bool containsDynamicBranching() const; - bool usesSampler(int i) const; - - struct Semantic - { - Semantic(unsigned char usage = 0xFF, unsigned char index = 0xFF) : usage(usage), index(index), centroid(false) - { - } - - bool operator==(const Semantic &semantic) const - { - return usage == semantic.usage && index == semantic.index; - } - - bool active() const - { - return usage != 0xFF; - } - - unsigned char usage; - unsigned char index; - bool centroid; - }; - - unsigned int dirtyConstantsF; // FIXME: Private - unsigned int dirtyConstantsI; // FIXME: Private - unsigned int dirtyConstantsB; // FIXME: Private - - protected: - void analyzeDirtyConstants(); - void analyzeDynamicBranching(); - void analyzeSamplers(); - - ShaderType shaderType; - - union - { - unsigned short version; - - struct - { - unsigned char minorVersion; - unsigned char majorVersion; - }; - }; - - int length; - Instruction **instruction; - - private: - static void removeComments(unsigned long *shaderToken, int tokenCount); - - int64_t hash; - - bool dynamicBranching; - unsigned short sampler; - - unsigned long *shaderToken; - int tokenCount; - }; - - typedef Shader::Instruction::Operation ShaderOperation; - typedef Shader::Instruction ShaderInstruction; - typedef Shader::Instruction::Parameter ShaderParameter; - typedef Shader::Instruction::Operation::Opcode ShaderOpcode; -} - -#endif // sw_Shader_hpp +// SwiftShader Software Renderer +// +// Copyright(c) 2005-2012 TransGaming Inc. +// +// All rights reserved. No part of this software may be copied, distributed, transmitted, +// transcribed, stored in a retrieval system, translated into any human or computer +// language by any means, or disclosed to third parties without the explicit written +// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express +// or implied, including but not limited to any patent rights, are granted to you. +// + +#ifndef sw_Shader_hpp +#define sw_Shader_hpp + +#include "Common/Types.hpp" + +#include <string> +#include <vector> + +namespace sw +{ + class Shader + { + public: + enum ShaderType + { + SHADER_PIXEL = 0xFFFF, + SHADER_VERTEX = 0xFFFE, + SHADER_GEOMETRY = 0xFFFD + }; + + enum Opcode + { + // Matches order in d3d9types.h + OPCODE_NOP = 0, + OPCODE_MOV, + OPCODE_ADD, + OPCODE_SUB, + OPCODE_MAD, + OPCODE_MUL, + OPCODE_RCPX, + OPCODE_RSQX, + OPCODE_DP3, + OPCODE_DP4, + OPCODE_MIN, + OPCODE_MAX, + OPCODE_SLT, + OPCODE_SGE, + OPCODE_EXP2X, // D3DSIO_EXP + OPCODE_LOG2X, // D3DSIO_LOG + OPCODE_LIT, + OPCODE_ATT, // D3DSIO_DST + OPCODE_LRP, + OPCODE_FRC, + OPCODE_M4X4, + OPCODE_M4X3, + OPCODE_M3X4, + OPCODE_M3X3, + OPCODE_M3X2, + OPCODE_CALL, + OPCODE_CALLNZ, + OPCODE_LOOP, + OPCODE_RET, + OPCODE_ENDLOOP, + OPCODE_LABEL, + OPCODE_DCL, + OPCODE_POWX, + OPCODE_CRS, + OPCODE_SGN, + OPCODE_ABS, + OPCODE_NRM3, // D3DSIO_NRM + OPCODE_SINCOS, + OPCODE_REP, + OPCODE_ENDREP, + OPCODE_IF, + OPCODE_IFC, + OPCODE_ELSE, + OPCODE_ENDIF, + OPCODE_BREAK, + OPCODE_BREAKC, + OPCODE_MOVA, + OPCODE_DEFB, + OPCODE_DEFI, + + OPCODE_TEXCOORD = 64, + OPCODE_TEXKILL, + OPCODE_TEX, + OPCODE_TEXBEM, + OPCODE_TEXBEML, + OPCODE_TEXREG2AR, + OPCODE_TEXREG2GB, + OPCODE_TEXM3X2PAD, + OPCODE_TEXM3X2TEX, + OPCODE_TEXM3X3PAD, + OPCODE_TEXM3X3TEX, + OPCODE_RESERVED0, + OPCODE_TEXM3X3SPEC, + OPCODE_TEXM3X3VSPEC, + OPCODE_EXPP, + OPCODE_LOGP, + OPCODE_CND, + OPCODE_DEF, + OPCODE_TEXREG2RGB, + OPCODE_TEXDP3TEX, + OPCODE_TEXM3X2DEPTH, + OPCODE_TEXDP3, + OPCODE_TEXM3X3, + OPCODE_TEXDEPTH, + OPCODE_CMP0, // D3DSIO_CMP + OPCODE_BEM, + OPCODE_DP2ADD, + OPCODE_DFDX, // D3DSIO_DSX + OPCODE_DFDY, // D3DSIO_DSY + OPCODE_TEXLDD, + OPCODE_CMP, // D3DSIO_SETP + OPCODE_TEXLDL, + OPCODE_BREAKP, + + OPCODE_PHASE = 0xFFFD, + OPCODE_COMMENT = 0xFFFE, + OPCODE_END = 0xFFFF, + + OPCODE_PS_1_0 = 0xFFFF0100, + OPCODE_PS_1_1 = 0xFFFF0101, + OPCODE_PS_1_2 = 0xFFFF0102, + OPCODE_PS_1_3 = 0xFFFF0103, + OPCODE_PS_1_4 = 0xFFFF0104, + OPCODE_PS_2_0 = 0xFFFF0200, + OPCODE_PS_2_x = 0xFFFF0201, + OPCODE_PS_3_0 = 0xFFFF0300, + + OPCODE_VS_1_0 = 0xFFFE0100, + OPCODE_VS_1_1 = 0xFFFE0101, + OPCODE_VS_2_0 = 0xFFFE0200, + OPCODE_VS_2_x = 0xFFFE0201, + OPCODE_VS_2_sw = 0xFFFE02FF, + OPCODE_VS_3_0 = 0xFFFE0300, + OPCODE_VS_3_sw = 0xFFFE03FF, + + OPCODE_WHILE = 0x80000001, + OPCODE_ENDWHILE, + OPCODE_COS, + OPCODE_SIN, + OPCODE_TAN, + OPCODE_ACOS, + OPCODE_ASIN, + OPCODE_ATAN, + OPCODE_ATAN2, + OPCODE_DP1, + OPCODE_DP2, + OPCODE_TRUNC, + OPCODE_FLOOR, + OPCODE_CEIL, + OPCODE_SQRT, + OPCODE_RSQ, + OPCODE_LEN2, + OPCODE_LEN3, + OPCODE_LEN4, + OPCODE_DIST1, + OPCODE_DIST2, + OPCODE_DIST3, + OPCODE_DIST4, + OPCODE_NRM2, + OPCODE_NRM4, + OPCODE_DIV, + OPCODE_MOD, + OPCODE_EXP2, + OPCODE_LOG2, + OPCODE_EXP, + OPCODE_LOG, + OPCODE_POW, + OPCODE_F2B, // Float to bool + OPCODE_B2F, // Bool to float + OPCODE_ALL, + OPCODE_ANY, + OPCODE_NOT, + OPCODE_OR, + OPCODE_XOR, + OPCODE_AND, + OPCODE_STEP, + OPCODE_SMOOTH, + OPCODE_FORWARD1, + OPCODE_FORWARD2, + OPCODE_FORWARD3, + OPCODE_FORWARD4, + OPCODE_REFLECT1, + OPCODE_REFLECT2, + OPCODE_REFLECT3, + OPCODE_REFLECT4, + OPCODE_REFRACT1, + OPCODE_REFRACT2, + OPCODE_REFRACT3, + OPCODE_REFRACT4, + OPCODE_ICMP, + OPCODE_SELECT, + OPCODE_EXTRACT, + OPCODE_INSERT, + OPCODE_DISCARD, + OPCODE_FWIDTH, + OPCODE_LEAVE, // Return before the end of the function + OPCODE_CONTINUE, + OPCODE_TEST, // Marks the end of the code that can be skipped by 'continue' + }; + + static Opcode OPCODE_DP(int); + static Opcode OPCODE_LEN(int); + static Opcode OPCODE_DIST(int); + static Opcode OPCODE_NRM(int); + static Opcode OPCODE_FORWARD(int); + static Opcode OPCODE_REFLECT(int); + static Opcode OPCODE_REFRACT(int); + + enum Control + { + CONTROL_RESERVED0, + CONTROL_GT, + CONTROL_EQ, + CONTROL_GE, + CONTROL_LT, + CONTROL_NE, + CONTROL_LE, + CONTROL_RESERVED1 + }; + + enum SamplerType + { + SAMPLER_UNKNOWN, + SAMPLER_1D, + SAMPLER_2D, + SAMPLER_CUBE, + SAMPLER_VOLUME + }; + + enum Usage // For vertex input/output declarations + { + USAGE_POSITION = 0, + USAGE_BLENDWEIGHT = 1, + USAGE_BLENDINDICES = 2, + USAGE_NORMAL = 3, + USAGE_PSIZE = 4, + USAGE_TEXCOORD = 5, + USAGE_TANGENT = 6, + USAGE_BINORMAL = 7, + USAGE_TESSFACTOR = 8, + USAGE_POSITIONT = 9, + USAGE_COLOR = 10, + USAGE_FOG = 11, + USAGE_DEPTH = 12, + USAGE_SAMPLE = 13 + }; + + enum ParameterType + { + PARAMETER_TEMP = 0, + PARAMETER_INPUT = 1, + PARAMETER_CONST = 2, + PARAMETER_TEXTURE = 3, + PARAMETER_ADDR = 3, + PARAMETER_RASTOUT = 4, + PARAMETER_ATTROUT = 5, + PARAMETER_TEXCRDOUT = 6, + PARAMETER_OUTPUT = 6, + PARAMETER_CONSTINT = 7, + PARAMETER_COLOROUT = 8, + PARAMETER_DEPTHOUT = 9, + PARAMETER_SAMPLER = 10, + PARAMETER_CONST2 = 11, + PARAMETER_CONST3 = 12, + PARAMETER_CONST4 = 13, + PARAMETER_CONSTBOOL = 14, + PARAMETER_LOOP = 15, + PARAMETER_TEMPFLOAT16 = 16, + PARAMETER_MISCTYPE = 17, + PARAMETER_LABEL = 18, + PARAMETER_PREDICATE = 19, + + // PARAMETER_FLOAT1LITERAL, + // PARAMETER_FLOAT2LITERAL, + // PARAMETER_FLOAT3LITERAL, + PARAMETER_FLOAT4LITERAL, + PARAMETER_BOOL1LITERAL, + // PARAMETER_BOOL2LITERAL, + // PARAMETER_BOOL3LITERAL, + // PARAMETER_BOOL4LITERAL, + // PARAMETER_INT1LITERAL, + // PARAMETER_INT2LITERAL, + // PARAMETER_INT3LITERAL, + PARAMETER_INT4LITERAL, + + PARAMETER_VOID + }; + + enum Modifier + { + MODIFIER_NONE, + MODIFIER_NEGATE, + MODIFIER_BIAS, + MODIFIER_BIAS_NEGATE, + MODIFIER_SIGN, + MODIFIER_SIGN_NEGATE, + MODIFIER_COMPLEMENT, + MODIFIER_X2, + MODIFIER_X2_NEGATE, + MODIFIER_DZ, + MODIFIER_DW, + MODIFIER_ABS, + MODIFIER_ABS_NEGATE, + MODIFIER_NOT + }; + + enum Analysis + { + // Flags indicating whether an instruction is affected by an execution enable mask + ANALYSIS_BRANCH = 0x00000001, + ANALYSIS_BREAK = 0x00000002, + ANALYSIS_CONTINUE = 0x00000004, + ANALYSIS_LEAVE = 0x00000008, + }; + + struct RelativeAddress + { + RelativeAddress() : type(PARAMETER_VOID), index(0), swizzle(0), scale(1), deterministic(false) + { + } + + ParameterType type : 8; + unsigned int index; + unsigned int swizzle : 8; + unsigned int scale; + bool deterministic; // Equal accross shader instances run in lockstep (e.g. unrollable loop couters) + }; + + struct Parameter + { + union + { + struct + { + unsigned int index; // For registers types + + RelativeAddress rel; + }; + + float value[4]; // For float constants + int integer[4]; // For integer constants + int boolean[4]; // For boolean constants + + struct + { + unsigned int label; // Label index + unsigned int callSite; // Call index (per label) + }; + }; + + Parameter() : type(PARAMETER_VOID), index(0) + { + } + + std::string string(ShaderType shaderType, unsigned short version) const; + std::string typeString(ShaderType shaderType, unsigned short version) const; + std::string relativeString() const; + + ParameterType type : 8; + }; + + struct DestinationParameter : Parameter + { + union + { + unsigned char mask; + + struct + { + bool x : 1; + bool y : 1; + bool z : 1; + bool w : 1; + }; + }; + + DestinationParameter() : mask(0xF), integer(false), saturate(false), partialPrecision(false), centroid(false), shift(0) + { + } + + std::string modifierString() const; + std::string shiftString() const; + std::string maskString() const; + + bool integer : 1; + bool saturate : 1; + bool partialPrecision : 1; + bool centroid : 1; + signed char shift : 4; + }; + + struct SourceParameter : Parameter + { + SourceParameter() : swizzle(0xE4), modifier(MODIFIER_NONE) + { + } + + std::string swizzleString() const; + std::string preModifierString() const; + std::string postModifierString() const; + + unsigned int swizzle : 8; + Modifier modifier : 8; + }; + + struct Instruction + { + explicit Instruction(Opcode opcode); + Instruction(const unsigned long *token, int size, unsigned char majorVersion); + + virtual ~Instruction(); + + void parseOperationToken(unsigned long token, unsigned char majorVersion); + void parseDeclarationToken(unsigned long token); + void parseDestinationToken(const unsigned long *token, unsigned char majorVersion); + void parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion); + + std::string string(ShaderType shaderType, unsigned short version) const; + static std::string swizzleString(ParameterType type, unsigned char swizzle); + std::string operationString(unsigned short version) const; + std::string controlString() const; + + bool isBranch() const; + bool isCall() const; + bool isBreak() const; + bool isLoop() const; + bool isEndLoop() const; + + Opcode opcode; + + union + { + Control control; + + struct + { + unsigned char project : 1; + unsigned char bias : 1; + }; + }; + + bool predicate; + bool predicateNot; // Negative predicate + unsigned char predicateSwizzle; + + bool coissue; + SamplerType samplerType; + Usage usage; + unsigned char usageIndex; + + DestinationParameter dst; + SourceParameter src[4]; + + union + { + unsigned int analysis; + + struct + { + // Keep in sync with Shader::Analysis flags + unsigned int analysisBranch : 1; + unsigned int analysisBreak : 1; + unsigned int analysisContinue : 1; + unsigned int analysisLeave : 1; + }; + }; + }; + + Shader(); + + ~Shader(); + + int getSerialID() const; + int getLength() const; + ShaderType getShaderType() const; + unsigned short getVersion() const; + + void append(Instruction *instruction); + void declareSampler(int i); + + const Instruction *getInstruction(unsigned int i) const; + int size(unsigned long opcode) const; + static int size(unsigned long opcode, unsigned short version); + + void print(const char *fileName, ...) const; + void printInstruction(int index, const char *fileName) const; + + static bool maskContainsComponent(int mask, int component); + static bool swizzleContainsComponent(int swizzle, int component); + static bool swizzleContainsComponentMasked(int swizzle, int component, int mask); + + bool containsDynamicBranching() const; + bool containsBreakInstruction() const; + bool containsContinueInstruction() const; + bool containsLeaveInstruction() const; + bool usesSampler(int i) const; + + struct Semantic + { + Semantic(unsigned char usage = 0xFF, unsigned char index = 0xFF) : usage(usage), index(index), centroid(false) + { + } + + bool operator==(const Semantic &semantic) const + { + return usage == semantic.usage && index == semantic.index; + } + + bool active() const + { + return usage != 0xFF; + } + + unsigned char usage; + unsigned char index; + bool centroid; + }; + + virtual void analyze() = 0; + + // FIXME: Private + unsigned int dirtyConstantsF; + unsigned int dirtyConstantsI; + unsigned int dirtyConstantsB; + + bool dynamicallyIndexedTemporaries; + bool dynamicallyIndexedInput; + bool dynamicallyIndexedOutput; + + protected: + void parse(const unsigned long *token); + + void analyzeDirtyConstants(); + void analyzeDynamicBranching(); + void analyzeSamplers(); + void analyzeCallSites(); + void analyzeDynamicIndexing(); + void markFunctionAnalysis(int functionLabel, Analysis flag); + + ShaderType shaderType; + + union + { + unsigned short version; + + struct + { + unsigned char minorVersion; + unsigned char majorVersion; + }; + }; + + std::vector<Instruction*> instruction; + + unsigned short usedSamplers; // Bit flags + + private: + const int serialID; + static volatile int serialCounter; + + bool dynamicBranching; + bool containsBreak; + bool containsContinue; + bool containsLeave; + }; +} + +#endif // sw_Shader_hpp
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp index 26facac..1fc2b2a 100644 --- a/src/Shader/ShaderCore.cpp +++ b/src/Shader/ShaderCore.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -11,11 +11,459 @@ #include "ShaderCore.hpp" -#include "Debug.hpp" +#include "Renderer/Renderer.hpp" +#include "Common/Debug.hpp" namespace sw { - void ShaderCore::mov(Color4f &dst, Color4f &src, bool floorToInteger) + extern TranscendentalPrecision logPrecision; + extern TranscendentalPrecision expPrecision; + extern TranscendentalPrecision rcpPrecision; + extern TranscendentalPrecision rsqPrecision; + + Vector4i::Vector4i() + { + } + + Vector4i::Vector4i(unsigned short x, unsigned short y, unsigned short z, unsigned short w) + { + this->x = Short4(x); + this->y = Short4(y); + this->z = Short4(z); + this->w = Short4(w); + } + + Vector4i::Vector4i(const Vector4i &rhs) + { + x = rhs.x; + y = rhs.y; + z = rhs.z; + w = rhs.w; + } + + Vector4i &Vector4i::operator=(const Vector4i &rhs) + { + x = rhs.x; + y = rhs.y; + z = rhs.z; + w = rhs.w; + + return *this; + } + + Short4 &Vector4i::operator[](int i) + { + switch(i) + { + case 0: return x; + case 1: return y; + case 2: return z; + case 3: return w; + } + + return x; + } + + Vector4f::Vector4f() + { + } + + Vector4f::Vector4f(float x, float y, float z, float w) + { + this->x = Float4(x); + this->y = Float4(y); + this->z = Float4(z); + this->w = Float4(w); + } + + Vector4f::Vector4f(const Vector4f &rhs) + { + x = rhs.x; + y = rhs.y; + z = rhs.z; + w = rhs.w; + } + + Vector4f &Vector4f::operator=(const Vector4f &rhs) + { + x = rhs.x; + y = rhs.y; + z = rhs.z; + w = rhs.w; + + return *this; + } + + Float4 &Vector4f::operator[](int i) + { + switch(i) + { + case 0: return x; + case 1: return y; + case 2: return z; + case 3: return w; + } + + return x; + } + + Float4 exponential2(RValue<Float4> x, bool pp) + { + Float4 x0; + Float4 x1; + Int4 x2; + + x0 = x; + + x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f + x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f + x1 = x0; + x1 -= Float4(0.5f); + x2 = RoundInt(x1); + x1 = Float4(x2); + x2 += Int4(0x0000007F); // 127 + x2 = x2 << 23; + x0 -= x1; + x1 = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f + x1 *= x0; + x1 += As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f + x1 *= x0; + x1 += As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f + x1 *= x0; + x1 += As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f + x1 *= x0; + x1 += As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f + x1 *= x0; + x1 += As<Float4>(Int4(0x3F7FFFFF)); // 9.9999994e-1f + x1 *= As<Float4>(x2); + + return x1; + } + + Float4 logarithm2(RValue<Float4> x, bool absolute, bool pp) + { + Float4 x0; + Float4 x1; + Float4 x2; + Float4 x3; + + x0 = x; + + x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000)); + x1 = As<Float4>(As<UInt4>(x1) >> 8); + x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f))); + x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f; + x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f))); + + x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f); + x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f); + x2 /= x3; + + x1 += (x0 - Float4(1.0f)) * x2; + + return x1; + } + + Float4 exponential(RValue<Float4> x, bool pp) + { + // FIXME: Propagate the constant + return exponential2(Float4(1.44269541f) * x, pp); // 1/ln(2) + } + + Float4 logarithm(RValue<Float4> x, bool absolute, bool pp) + { + // FIXME: Propagate the constant + return Float4(6.93147181e-1f) * logarithm2(x, absolute, pp); // ln(2) + } + + Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp) + { + Float4 log = logarithm2(x, true, pp); + log *= y; + return exponential2(log, pp); + } + + Float4 reciprocal(RValue<Float4> x, bool pp, bool finite) + { + Float4 rcp; + + if(!pp && rcpPrecision >= WHQL) + { + rcp = Float4(1.0f) / x; + } + else + { + rcp = Rcp_pp(x); + + if(!pp) + { + rcp = (rcp + rcp) - (x * rcp * rcp); + } + } + + if(finite) + { + int big = 0x7F7FFFFF; + rcp = Min(rcp, Float4((float&)big)); + } + + return rcp; + } + + Float4 reciprocalSquareRoot(RValue<Float4> x, bool absolute, bool pp) + { + Float4 abs = x; + + if(absolute) + { + abs = Abs(abs); + } + + Float4 rsq; + + if(!pp && rsqPrecision >= IEEE) + { + rsq = Float4(1.0f) / Sqrt(abs); + } + else + { + rsq = RcpSqrt_pp(abs); + + if(!pp) + { + rsq = rsq * (Float4(3.0f) - rsq * rsq * abs) * Float4(0.5f); + } + } + + int big = 0x7F7FFFFF; + rsq = Min(rsq, Float4((float&)big)); + + return rsq; + } + + Float4 modulo(RValue<Float4> x, RValue<Float4> y) + { + return x - y * Floor(x / y); + } + + Float4 sine_pi(RValue<Float4> x, bool pp) + { + const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2 + const Float4 B = Float4(1.27323954e+0f); // 4/pi + const Float4 C = Float4(7.75160950e-1f); + const Float4 D = Float4(2.24839049e-1f); + + // Parabola approximating sine + Float4 sin = x * (Abs(x) * A + B); + + // Improve precision from 0.06 to 0.001 + if(true) + { + sin = sin * (Abs(sin) * D + C); + } + + return sin; + } + + Float4 cosine_pi(RValue<Float4> x, bool pp) + { + // cos(x) = sin(x + pi/2) + Float4 y = x + Float4(1.57079632e+0f); + + // Wrap around + y -= As<Float4>(CmpNLT(y, Float4(3.14159265e+0f)) & As<Int4>(Float4(6.28318530e+0f))); + + return sine_pi(y, pp); + } + + Float4 sine(RValue<Float4> x, bool pp) + { + // Reduce to [-0.5, 0.5] range + Float4 y = x * Float4(1.59154943e-1f); // 1/2pi + y = y - Round(y); + + const Float4 A = Float4(-16.0f); + const Float4 B = Float4(8.0f); + const Float4 C = Float4(7.75160950e-1f); + const Float4 D = Float4(2.24839049e-1f); + + // Parabola approximating sine + Float4 sin = y * (Abs(y) * A + B); + + // Improve precision from 0.06 to 0.001 + if(true) + { + sin = sin * (Abs(sin) * D + C); + } + + return sin; + } + + Float4 cosine(RValue<Float4> x, bool pp) + { + // cos(x) = sin(x + pi/2) + Float4 y = x + Float4(1.57079632e+0f); + return sine(y, pp); + } + + Float4 tangent(RValue<Float4> x, bool pp) + { + return sine(x, pp) / cosine(x, pp); + } + + Float4 arccos(RValue<Float4> x, bool pp) + { + // pi/2 - arcsin(x) + return Float4(1.57079632e+0f) - arcsin(x); + } + + Float4 arcsin(RValue<Float4> x, bool pp) + { + // x*(pi/2-sqrt(1-x*x)*pi/5) + return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x*x) * Float4(6.28318531e-1f)); + } + + Float4 arctan(RValue<Float4> x, bool pp) + { + Int4 O = CmpNLT(Abs(x), Float4(1.0f)); + Float4 y = As<Float4>(O & As<Int4>(Float4(1.0f) / x) | ~O & As<Int4>(x)); // FIXME: Vector select + + // Approximation of atan in [-1..1] + Float4 theta = y * (Float4(-0.27f) * Abs(y) + Float4(1.05539816f)); + + // +/-pi/2 depending on sign of x + Float4 sgnPi_2 = As<Float4>(As<Int4>(Float4(1.57079632e+0f)) ^ (As<Int4>(x) & Int4(0x80000000))); + + theta = As<Float4>(O & As<Int4>(sgnPi_2 - theta) | ~O & As<Int4>(theta)); // FIXME: Vector select + + return theta; + } + + Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp) + { + // Rotate to upper semicircle when in lower semicircle + Int4 S = CmpLT(y, Float4(0.0f)); + Float4 theta = As<Float4>(S & As<Int4>(Float4(-3.14159265e+0f))); // -pi + Float4 x0 = As<Float4>((As<Int4>(y) & Int4(0x80000000)) ^ As<Int4>(x)); + Float4 y0 = Abs(y); + + // Rotate to right quadrant when in left quadrant + Int4 Q = CmpLT(x0, Float4(0.0f)); + theta += As<Float4>(Q & As<Int4>(Float4(1.57079632e+0f))); // pi/2 + Float4 x1 = As<Float4>(Q & As<Int4>(y0) | ~Q & As<Int4>(x0)); // FIXME: Vector select + Float4 y1 = As<Float4>(Q & As<Int4>(-x0) | ~Q & As<Int4>(y0)); // FIXME: Vector select + + // Rotate to first octant when in second octant + Int4 O = CmpNLT(y1, x1); + theta += As<Float4>(O & As<Int4>(Float4(7.85398163e-1f))); // pi/4 + Float4 x2 = As<Float4>(O & As<Int4>(Float4(7.07106781e-1f) * x1 + Float4(7.07106781e-1f) * y1) | ~O & As<Int4>(x1)); // sqrt(2)/2 // FIXME: Vector select + Float4 y2 = As<Float4>(O & As<Int4>(Float4(7.07106781e-1f) * y1 - Float4(7.07106781e-1f) * x1) | ~O & As<Int4>(y1)); // FIXME: Vector select + + // Approximation of atan in [0..1] + Float4 y_x = y2 / x2; + theta += y_x * (Float4(-0.27f) * y_x + Float4(1.05539816f)); + + return theta; + } + + Float4 dot2(Vector4f &v0, Vector4f &v1) + { + return v0.x * v1.x + v0.y * v1.y; + } + + Float4 dot3(Vector4f &v0, Vector4f &v1) + { + return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; + } + + Float4 dot4(Vector4f &v0, Vector4f &v1) + { + return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w; + } + + void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3) + { + Int2 tmp0 = UnpackHigh(row0, row1); + Int2 tmp1 = UnpackHigh(row2, row3); + Int2 tmp2 = UnpackLow(row0, row1); + Int2 tmp3 = UnpackLow(row2, row3); + + row0 = As<Short4>(UnpackLow(tmp2, tmp3)); + row1 = As<Short4>(UnpackHigh(tmp2, tmp3)); + row2 = As<Short4>(UnpackLow(tmp0, tmp1)); + row3 = As<Short4>(UnpackHigh(tmp0, tmp1)); + } + + void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) + { + Float4 tmp0 = UnpackLow(row0, row1); + Float4 tmp1 = UnpackLow(row2, row3); + Float4 tmp2 = UnpackHigh(row0, row1); + Float4 tmp3 = UnpackHigh(row2, row3); + + row0 = Float4(tmp0.xy, tmp1.xy); + row1 = Float4(tmp0.zw, tmp1.zw); + row2 = Float4(tmp2.xy, tmp3.xy); + row3 = Float4(tmp2.zw, tmp3.zw); + } + + void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) + { + Float4 tmp0 = UnpackLow(row0, row1); + Float4 tmp1 = UnpackLow(row2, row3); + Float4 tmp2 = UnpackHigh(row0, row1); + Float4 tmp3 = UnpackHigh(row2, row3); + + row0 = Float4(tmp0.xy, tmp1.xy); + row1 = Float4(tmp0.zw, tmp1.zw); + row2 = Float4(tmp2.xy, tmp3.xy); + } + + void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) + { + Float4 tmp0 = UnpackLow(row0, row1); + Float4 tmp1 = UnpackLow(row2, row3); + + row0 = Float4(tmp0.xy, tmp1.xy); + row1 = Float4(tmp0.zw, tmp1.zw); + } + + void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) + { + Float4 tmp0 = UnpackLow(row0, row1); + Float4 tmp1 = UnpackLow(row2, row3); + + row0 = Float4(tmp0.xy, tmp1.xy); + } + + void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) + { + row0 = UnpackLow(row0, row1); + row1 = Float4(row0.zw, row1.zw); + row2 = UnpackHigh(row0, row1); + row3 = Float4(row2.zw, row3.zw); + } + + void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) + { + row0 = UnpackLow(row2, row3); + row1 = Float4(row0.zw, row1.zw); + row2 = UnpackHigh(row2, row3); + row3 = Float4(row2.zw, row3.zw); + } + + void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N) + { + switch(N) + { + case 1: transpose4x1(row0, row1, row2, row3); break; + case 2: transpose4x2(row0, row1, row2, row3); break; + case 3: transpose4x3(row0, row1, row2, row3); break; + case 4: transpose4x4(row0, row1, row2, row3); break; + } + } + + void ShaderCore::mov(Vector4f &dst, Vector4f &src, bool floorToInteger) { if(floorToInteger) { @@ -27,7 +475,23 @@ } } - void ShaderCore::add(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::f2b(Vector4f &dst, Vector4f &src) + { + dst.x = As<Float4>(CmpNEQ(src.x, Float4(0.0f))); + dst.y = As<Float4>(CmpNEQ(src.y, Float4(0.0f))); + dst.z = As<Float4>(CmpNEQ(src.z, Float4(0.0f))); + dst.w = As<Float4>(CmpNEQ(src.w, Float4(0.0f))); + } + + void ShaderCore::b2f(Vector4f &dst, Vector4f &src) + { + dst.x = As<Float4>(As<Int4>(src.x) & As<Int4>(Float4(1.0f))); + dst.y = As<Float4>(As<Int4>(src.y) & As<Int4>(Float4(1.0f))); + dst.z = As<Float4>(As<Int4>(src.z) & As<Int4>(Float4(1.0f))); + dst.w = As<Float4>(As<Int4>(src.w) & As<Int4>(Float4(1.0f))); + } + + void ShaderCore::add(Vector4f &dst, Vector4f &src0, Vector4f &src1) { dst.x = src0.x + src1.x; dst.y = src0.y + src1.y; @@ -35,7 +499,7 @@ dst.w = src0.w + src1.w; } - void ShaderCore::sub(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::sub(Vector4f &dst, Vector4f &src0, Vector4f &src1) { dst.x = src0.x - src1.x; dst.y = src0.y - src1.y; @@ -43,7 +507,7 @@ dst.w = src0.w - src1.w; } - void ShaderCore::mad(Color4f &dst, Color4f &src0, Color4f &src1, Color4f &src2) + void ShaderCore::mad(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2) { dst.x = src0.x * src1.x + src2.x; dst.y = src0.y * src1.y + src2.y; @@ -51,7 +515,7 @@ dst.w = src0.w * src1.w + src2.w; } - void ShaderCore::mul(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::mul(Vector4f &dst, Vector4f &src0, Vector4f &src1) { dst.x = src0.x * src1.x; dst.y = src0.y * src1.y; @@ -59,7 +523,7 @@ dst.w = src0.w * src1.w; } - void ShaderCore::rcp(Color4f &dst, Color4f &src, bool pp) + void ShaderCore::rcpx(Vector4f &dst, Vector4f &src, bool pp) { Float4 rcp = reciprocal(src.x, pp, true); @@ -69,17 +533,126 @@ dst.w = rcp; } - void ShaderCore::rsq(Color4f &dst, Color4f &src, bool pp) + void ShaderCore::div(Vector4f &dst, Vector4f &src0, Vector4f &src1) + { + dst.x = src0.x / src1.x; + dst.y = src0.y / src1.y; + dst.z = src0.z / src1.z; + dst.w = src0.w / src1.w; + } + + void ShaderCore::mod(Vector4f &dst, Vector4f &src0, Vector4f &src1) + { + dst.x = modulo(src0.x, src1.x); + dst.y = modulo(src0.y, src1.y); + dst.z = modulo(src0.z, src1.z); + dst.w = modulo(src0.w, src1.w); + } + + void ShaderCore::rsqx(Vector4f &dst, Vector4f &src, bool pp) { Float4 rsq = reciprocalSquareRoot(src.x, true, pp); - dst.r = rsq; - dst.g = rsq; - dst.b = rsq; - dst.a = rsq; + dst.x = rsq; + dst.y = rsq; + dst.z = rsq; + dst.w = rsq; } - void ShaderCore::dp3(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::sqrt(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = Sqrt(src.x); + dst.y = Sqrt(src.y); + dst.z = Sqrt(src.z); + dst.w = Sqrt(src.w); + } + + void ShaderCore::rsq(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = reciprocalSquareRoot(src.x, false, pp); + dst.y = reciprocalSquareRoot(src.y, false, pp); + dst.z = reciprocalSquareRoot(src.z, false, pp); + dst.w = reciprocalSquareRoot(src.w, false, pp); + } + + void ShaderCore::len2(Float4 &dst, Vector4f &src, bool pp) + { + dst = Sqrt(dot2(src, src)); + } + + void ShaderCore::len3(Float4 &dst, Vector4f &src, bool pp) + { + dst = Sqrt(dot3(src, src)); + } + + void ShaderCore::len4(Float4 &dst, Vector4f &src, bool pp) + { + dst = Sqrt(dot4(src, src)); + } + + void ShaderCore::dist1(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp) + { + dst = Abs(src0.x - src1.x); + } + + void ShaderCore::dist2(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp) + { + Float4 dx = src0.x - src1.x; + Float4 dy = src0.y - src1.y; + Float4 dot2 = dx * dx + dy * dy; + dst = Sqrt(dot2); + } + + void ShaderCore::dist3(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp) + { + Float4 dx = src0.x - src1.x; + Float4 dy = src0.y - src1.y; + Float4 dz = src0.z - src1.z; + Float4 dot3 = dx * dx + dy * dy + dz * dz; + dst = Sqrt(dot3); + } + + void ShaderCore::dist4(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp) + { + Float4 dx = src0.x - src1.x; + Float4 dy = src0.y - src1.y; + Float4 dz = src0.z - src1.z; + Float4 dw = src0.w - src1.w; + Float4 dot4 = dx * dx + dy * dy + dz * dz + dw * dw; + dst = Sqrt(dot4); + } + + void ShaderCore::dp1(Vector4f &dst, Vector4f &src0, Vector4f &src1) + { + Float4 t = src0.x * src1.x; + + dst.x = t; + dst.y = t; + dst.z = t; + dst.w = t; + } + + void ShaderCore::dp2(Vector4f &dst, Vector4f &src0, Vector4f &src1) + { + Float4 t = dot2(src0, src1); + + dst.x = t; + dst.y = t; + dst.z = t; + dst.w = t; + } + + void ShaderCore::dp2add(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2) + { + Float4 t = dot2(src0, src1) + src2.x; + + dst.x = t; + dst.y = t; + dst.z = t; + dst.w = t; + } + + void ShaderCore::dp3(Vector4f &dst, Vector4f &src0, Vector4f &src1) { Float4 dot = dot3(src0, src1); @@ -89,7 +662,7 @@ dst.w = dot; } - void ShaderCore::dp4(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::dp4(Vector4f &dst, Vector4f &src0, Vector4f &src1) { Float4 dot = dot4(src0, src1); @@ -99,7 +672,7 @@ dst.w = dot; } - void ShaderCore::min(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::min(Vector4f &dst, Vector4f &src0, Vector4f &src1) { dst.x = Min(src0.x, src1.x); dst.y = Min(src0.y, src1.y); @@ -107,7 +680,7 @@ dst.w = Min(src0.w, src1.w); } - void ShaderCore::max(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::max(Vector4f &dst, Vector4f &src0, Vector4f &src1) { dst.x = Max(src0.x, src1.x); dst.y = Max(src0.y, src1.y); @@ -115,39 +688,25 @@ dst.w = Max(src0.w, src1.w); } - void ShaderCore::slt(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::slt(Vector4f &dst, Vector4f &src0, Vector4f &src1) { - Int4 xMask = As<Int4>(CmpLT(src0.x, src1.x)); - Int4 yMask = As<Int4>(CmpLT(src0.y, src1.y)); - Int4 zMask = As<Int4>(CmpLT(src0.z, src1.z)); - Int4 wMask = As<Int4>(CmpLT(src0.w, src1.w)); - - Int4 iOne = As<Int4>(Float4(1, 1, 1, 1)); - - dst.x = As<Float4>(xMask & iOne); - dst.y = As<Float4>(yMask & iOne); - dst.z = As<Float4>(zMask & iOne); - dst.w = As<Float4>(wMask & iOne); + dst.x = As<Float4>(As<Int4>(CmpLT(src0.x, src1.x)) & As<Int4>(Float4(1.0f))); + dst.y = As<Float4>(As<Int4>(CmpLT(src0.y, src1.y)) & As<Int4>(Float4(1.0f))); + dst.z = As<Float4>(As<Int4>(CmpLT(src0.z, src1.z)) & As<Int4>(Float4(1.0f))); + dst.w = As<Float4>(As<Int4>(CmpLT(src0.w, src1.w)) & As<Int4>(Float4(1.0f))); } - void ShaderCore::sge(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::step(Vector4f &dst, Vector4f &edge, Vector4f &x) { - Int4 xMask = As<Int4>(CmpNLT(src0.x, src1.x)); - Int4 yMask = As<Int4>(CmpNLT(src0.y, src1.y)); - Int4 zMask = As<Int4>(CmpNLT(src0.z, src1.z)); - Int4 wMask = As<Int4>(CmpNLT(src0.w, src1.w)); - - Int4 iOne = As<Int4>(Float4(1, 1, 1, 1)); - - dst.x = As<Float4>(xMask & iOne); - dst.y = As<Float4>(yMask & iOne); - dst.z = As<Float4>(zMask & iOne); - dst.w = As<Float4>(wMask & iOne); + dst.x = As<Float4>(CmpNLT(x.x, edge.x) & As<Int4>(Float4(1.0f))); + dst.y = As<Float4>(CmpNLT(x.y, edge.y) & As<Int4>(Float4(1.0f))); + dst.z = As<Float4>(CmpNLT(x.z, edge.z) & As<Int4>(Float4(1.0f))); + dst.w = As<Float4>(CmpNLT(x.w, edge.w) & As<Int4>(Float4(1.0f))); } - void ShaderCore::exp(Color4f &dst, Color4f &src, bool pp) + void ShaderCore::exp2x(Vector4f &dst, Vector4f &src, bool pp) { - Float4 exp = exponential(src.x, pp); + Float4 exp = exponential2(src.x, pp); dst.x = exp; dst.y = exp; @@ -155,9 +714,25 @@ dst.w = exp; } - void ShaderCore::log(Color4f &dst, Color4f &src, bool pp) + void ShaderCore::exp2(Vector4f &dst, Vector4f &src, bool pp) { - Float4 log = logarithm(src.x, true, pp); + dst.x = exponential2(src.x, pp); + dst.y = exponential2(src.y, pp); + dst.z = exponential2(src.z, pp); + dst.w = exponential2(src.w, pp); + } + + void ShaderCore::exp(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = exponential(src.x, pp); + dst.y = exponential(src.y, pp); + dst.z = exponential(src.z, pp); + dst.w = exponential(src.w, pp); + } + + void ShaderCore::log2x(Vector4f &dst, Vector4f &src, bool pp) + { + Float4 log = logarithm2(src.x, true, pp); dst.x = log; dst.y = log; @@ -165,33 +740,50 @@ dst.w = log; } - void ShaderCore::lit(Color4f &dst, Color4f &src) + void ShaderCore::log2(Vector4f &dst, Vector4f &src, bool pp) { - dst.x = Float4(1.0f, 1.0f, 1.0f, 1.0f); - dst.y = Max(src.x, Float4(0.0f, 0.0f, 0.0f, 0.0f)); + dst.x = logarithm2(src.x, pp); + dst.y = logarithm2(src.y, pp); + dst.z = logarithm2(src.z, pp); + dst.w = logarithm2(src.w, pp); + } + + void ShaderCore::log(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = logarithm(src.x, false, pp); + dst.y = logarithm(src.y, false, pp); + dst.z = logarithm(src.z, false, pp); + dst.w = logarithm(src.w, false, pp); + } + + void ShaderCore::lit(Vector4f &dst, Vector4f &src) + { + dst.x = Float4(1.0f); + dst.y = Max(src.x, Float4(0.0f)); Float4 pow; pow = src.w; - pow = Min(pow, Float4(127.9961f, 127.9961f, 127.9961f, 127.9961f)); - pow = Max(pow, Float4(-127.9961f, -127.9961f, -127.9961f, -127.9961f)); + pow = Min(pow, Float4(127.9961f)); + pow = Max(pow, Float4(-127.9961f)); dst.z = power(src.y, pow); - dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.x, Float4(0.0f, 0.0f, 0.0f, 0.0f))); - dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.y, Float4(0.0f, 0.0f, 0.0f, 0.0f))); + dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.x, Float4(0.0f))); + dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.y, Float4(0.0f))); - dst.w = Float4(1.0f, 1.0f, 1.0f, 1.0f); + dst.w = Float4(1.0f); } - void ShaderCore::dst(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::att(Vector4f &dst, Vector4f &src0, Vector4f &src1) { + // Computes attenuation factors (1, d, d^2, 1/d) assuming src0 = d^2, src1 = 1/d dst.x = 1; dst.y = src0.y * src1.y; dst.z = src0.z; dst.w = src1.w; } - void ShaderCore::lrp(Color4f &dst, Color4f &src0, Color4f &src1, Color4f &src2) + void ShaderCore::lrp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2) { dst.x = src0.x * (src1.x - src2.x) + src2.x; dst.y = src0.y * (src1.y - src2.y) + src2.y; @@ -199,15 +791,47 @@ dst.w = src0.w * (src1.w - src2.w) + src2.w; } - void ShaderCore::frc(Color4f &dst, Color4f &src) + void ShaderCore::smooth(Vector4f &dst, Vector4f &edge0, Vector4f &edge1, Vector4f &x) { - dst.x = Fraction(src.x); - dst.y = Fraction(src.y); - dst.z = Fraction(src.z); - dst.w = Fraction(src.w); + Float4 tx = Min(Max((x.x - edge0.x) / (edge1.x - edge0.x), Float4(0.0f)), Float4(1.0f)); dst.x = tx * tx * (Float4(3.0f) - Float4(2.0f) * tx); + Float4 ty = Min(Max((x.y - edge0.y) / (edge1.y - edge0.y), Float4(0.0f)), Float4(1.0f)); dst.y = ty * ty * (Float4(3.0f) - Float4(2.0f) * ty); + Float4 tz = Min(Max((x.z - edge0.z) / (edge1.z - edge0.z), Float4(0.0f)), Float4(1.0f)); dst.z = tz * tz * (Float4(3.0f) - Float4(2.0f) * tz); + Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw); } - void ShaderCore::pow(Color4f &dst, Color4f &src0, Color4f &src1, bool pp) + void ShaderCore::frc(Vector4f &dst, Vector4f &src) + { + dst.x = Frac(src.x); + dst.y = Frac(src.y); + dst.z = Frac(src.z); + dst.w = Frac(src.w); + } + + void ShaderCore::trunc(Vector4f &dst, Vector4f &src) + { + dst.x = Trunc(src.x); + dst.y = Trunc(src.y); + dst.z = Trunc(src.z); + dst.w = Trunc(src.w); + } + + void ShaderCore::floor(Vector4f &dst, Vector4f &src) + { + dst.x = Floor(src.x); + dst.y = Floor(src.y); + dst.z = Floor(src.z); + dst.w = Floor(src.w); + } + + void ShaderCore::ceil(Vector4f &dst, Vector4f &src) + { + dst.x = Ceil(src.x); + dst.y = Ceil(src.y); + dst.z = Ceil(src.z); + dst.w = Ceil(src.w); + } + + void ShaderCore::powx(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp) { Float4 pow = power(src0.x, src1.x, pp); @@ -217,14 +841,136 @@ dst.w = pow; } - void ShaderCore::crs(Color4f &dst, Color4f &src0, Color4f &src1) + void ShaderCore::pow(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp) + { + dst.x = power(src0.x, src1.x, pp); + dst.y = power(src0.y, src1.y, pp); + dst.z = power(src0.z, src1.z, pp); + dst.w = power(src0.w, src1.w, pp); + } + + void ShaderCore::crs(Vector4f &dst, Vector4f &src0, Vector4f &src1) { dst.x = src0.y * src1.z - src0.z * src1.y; dst.y = src0.z * src1.x - src0.x * src1.z; dst.z = src0.x * src1.y - src0.y * src1.x; } - void ShaderCore::sgn(Color4f &dst, Color4f &src) + void ShaderCore::forward1(Vector4f &dst, Vector4f &N, Vector4f &I, Vector4f &Nref) + { + Int4 flip = CmpNLT(Nref.x * I.x, Float4(0.0f)) & Int4(0x80000000); + + dst.x = As<Float4>(flip ^ As<Int4>(N.x)); + } + + void ShaderCore::forward2(Vector4f &dst, Vector4f &N, Vector4f &I, Vector4f &Nref) + { + Int4 flip = CmpNLT(dot2(Nref, I), Float4(0.0f)) & Int4(0x80000000); + + dst.x = As<Float4>(flip ^ As<Int4>(N.x)); + dst.y = As<Float4>(flip ^ As<Int4>(N.y)); + } + + void ShaderCore::forward3(Vector4f &dst, Vector4f &N, Vector4f &I, Vector4f &Nref) + { + Int4 flip = CmpNLT(dot3(Nref, I), Float4(0.0f)) & Int4(0x80000000); + + dst.x = As<Float4>(flip ^ As<Int4>(N.x)); + dst.y = As<Float4>(flip ^ As<Int4>(N.y)); + dst.z = As<Float4>(flip ^ As<Int4>(N.z)); + } + + void ShaderCore::forward4(Vector4f &dst, Vector4f &N, Vector4f &I, Vector4f &Nref) + { + Int4 flip = CmpNLT(dot4(Nref, I), Float4(0.0f)) & Int4(0x80000000); + + dst.x = As<Float4>(flip ^ As<Int4>(N.x)); + dst.y = As<Float4>(flip ^ As<Int4>(N.y)); + dst.z = As<Float4>(flip ^ As<Int4>(N.z)); + dst.w = As<Float4>(flip ^ As<Int4>(N.w)); + } + + void ShaderCore::reflect1(Vector4f &dst, Vector4f &I, Vector4f &N) + { + Float4 d = N.x * I.x; + + dst.x = I.x - Float4(2.0f) * d * N.x; + } + + void ShaderCore::reflect2(Vector4f &dst, Vector4f &I, Vector4f &N) + { + Float4 d = dot2(N, I); + + dst.x = I.x - Float4(2.0f) * d * N.x; + dst.y = I.y - Float4(2.0f) * d * N.y; + } + + void ShaderCore::reflect3(Vector4f &dst, Vector4f &I, Vector4f &N) + { + Float4 d = dot3(N, I); + + dst.x = I.x - Float4(2.0f) * d * N.x; + dst.y = I.y - Float4(2.0f) * d * N.y; + dst.z = I.z - Float4(2.0f) * d * N.z; + } + + void ShaderCore::reflect4(Vector4f &dst, Vector4f &I, Vector4f &N) + { + Float4 d = dot4(N, I); + + dst.x = I.x - Float4(2.0f) * d * N.x; + dst.y = I.y - Float4(2.0f) * d * N.y; + dst.z = I.z - Float4(2.0f) * d * N.z; + dst.w = I.w - Float4(2.0f) * d * N.w; + } + + void ShaderCore::refract1(Vector4f &dst, Vector4f &I, Vector4f &N, Float4 &eta) + { + Float4 d = N.x * I.x; + Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); + Int4 pos = CmpNLT(k, Float4(0.0f)); + Float4 t = (eta * d + Sqrt(k)); + + dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); + } + + void ShaderCore::refract2(Vector4f &dst, Vector4f &I, Vector4f &N, Float4 &eta) + { + Float4 d = dot2(N, I); + Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); + Int4 pos = CmpNLT(k, Float4(0.0f)); + Float4 t = (eta * d + Sqrt(k)); + + dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); + dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); + } + + void ShaderCore::refract3(Vector4f &dst, Vector4f &I, Vector4f &N, Float4 &eta) + { + Float4 d = dot3(N, I); + Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); + Int4 pos = CmpNLT(k, Float4(0.0f)); + Float4 t = (eta * d + Sqrt(k)); + + dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); + dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); + dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z)); + } + + void ShaderCore::refract4(Vector4f &dst, Vector4f &I, Vector4f &N, Float4 &eta) + { + Float4 d = dot4(N, I); + Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); + Int4 pos = CmpNLT(k, Float4(0.0f)); + Float4 t = (eta * d + Sqrt(k)); + + dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); + dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); + dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z)); + dst.w = As<Float4>(pos & As<Int4>(eta * I.w - t * N.w)); + } + + void ShaderCore::sgn(Vector4f &dst, Vector4f &src) { sgn(dst.x, src.x); sgn(dst.y, src.y); @@ -232,15 +978,26 @@ sgn(dst.w, src.w); } - void ShaderCore::abs(Color4f &dst, Color4f &src) + void ShaderCore::abs(Vector4f &dst, Vector4f &src) { dst.x = Abs(src.x); dst.y = Abs(src.y); dst.z = Abs(src.z); dst.w = Abs(src.w); } + + void ShaderCore::nrm2(Vector4f &dst, Vector4f &src, bool pp) + { + Float4 dot = dot2(src, src); + Float4 rsq = reciprocalSquareRoot(dot, false, pp); - void ShaderCore::nrm(Color4f &dst, Color4f &src, bool pp) + dst.x = src.x * rsq; + dst.y = src.y * rsq; + dst.z = src.z * rsq; + dst.w = src.w * rsq; + } + + void ShaderCore::nrm3(Vector4f &dst, Vector4f &src, bool pp) { Float4 dot = dot3(src, src); Float4 rsq = reciprocalSquareRoot(dot, false, pp); @@ -250,42 +1007,99 @@ dst.z = src.z * rsq; dst.w = src.w * rsq; } - - void ShaderCore::sincos(Color4f &dst, Color4f &src, bool pp) + + void ShaderCore::nrm4(Vector4f &dst, Vector4f &src, bool pp) { - Float4 tmp0; - Float4 tmp1; + Float4 dot = dot4(src, src); + Float4 rsq = reciprocalSquareRoot(dot, false, pp); - tmp0 = src.x; - - // cos(x) = sin(x + pi/2) - tmp0 += Float4(1.57079632e+0f); - tmp1 = As<Float4>(CmpNLT(tmp0, Float4(3.14159265e+0f)) & As<Int4>(Float4(6.28318530e+0f))); - tmp0 -= tmp1; - - dst.x = sine(tmp0, pp); - dst.y = sine(src.x, pp); + dst.x = src.x * rsq; + dst.y = src.y * rsq; + dst.z = src.z * rsq; + dst.w = src.w * rsq; + } + + void ShaderCore::sincos(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = cosine_pi(src.x, pp); + dst.y = sine_pi(src.x, pp); } - void ShaderCore::expp(Color4f &dst, Color4f &src, unsigned short version) + void ShaderCore::cos(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = cosine(src.x, pp); + dst.y = cosine(src.y, pp); + dst.z = cosine(src.z, pp); + dst.w = cosine(src.w, pp); + } + + void ShaderCore::sin(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = sine(src.x, pp); + dst.y = sine(src.y, pp); + dst.z = sine(src.z, pp); + dst.w = sine(src.w, pp); + } + + void ShaderCore::tan(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = tangent(src.x, pp); + dst.y = tangent(src.y, pp); + dst.z = tangent(src.z, pp); + dst.w = tangent(src.w, pp); + } + + void ShaderCore::acos(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = arccos(src.x, pp); + dst.y = arccos(src.y, pp); + dst.z = arccos(src.z, pp); + dst.w = arccos(src.w, pp); + } + + void ShaderCore::asin(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = arcsin(src.x, pp); + dst.y = arcsin(src.y, pp); + dst.z = arcsin(src.z, pp); + dst.w = arcsin(src.w, pp); + } + + void ShaderCore::atan(Vector4f &dst, Vector4f &src, bool pp) + { + dst.x = arctan(src.x, pp); + dst.y = arctan(src.y, pp); + dst.z = arctan(src.z, pp); + dst.w = arctan(src.w, pp); + } + + void ShaderCore::atan2(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp) + { + dst.x = arctan(src0.x, src1.x, pp); + dst.y = arctan(src0.y, src1.y, pp); + dst.z = arctan(src0.z, src1.z, pp); + dst.w = arctan(src0.w, src1.w, pp); + } + + void ShaderCore::expp(Vector4f &dst, Vector4f &src, unsigned short version) { if(version < 0x0200) { - Float4 frc = Fraction(src.x); + Float4 frc = Frac(src.x); Float4 floor = src.x - frc; - dst.x = exponential(floor, true); + dst.x = exponential2(floor, true); dst.y = frc; - dst.z = exponential(src.x, true); - dst.w = Float4(1.0f, 1.0f, 1.0f, 1.0f); + dst.z = exponential2(src.x, true); + dst.w = Float4(1.0f); } else // Version >= 2.0 { - exp(dst, src, true); // FIXME: 10-bit precision suffices + exp2x(dst, src, true); // FIXME: 10-bit precision suffices } } - void ShaderCore::logp(Color4f &dst, Color4f &src, unsigned short version) + void ShaderCore::logp(Vector4f &dst, Vector4f &src, unsigned short version) { if(version < 0x0200) { @@ -298,92 +1112,109 @@ tmp1 = tmp0; // X component - r = As<Int4>(As<UInt4>(tmp0) >> 23) - Int4(127, 127, 127, 127); + r = As<Int4>(As<UInt4>(tmp0) >> 23) - Int4(127); dst.x = Float4(r); // Y component dst.y = As<Float4>((As<Int4>(tmp1) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f))); // Z component - dst.z = logarithm(src.x, true, true); + dst.z = logarithm2(src.x, true, true); // W component dst.w = 1.0f; } else { - log(dst, src, true); + log2x(dst, src, true); } } - void ShaderCore::cmp(Color4f &dst, Color4f &src0, Color4f &src1, Color4f &src2) + void ShaderCore::cmp0(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2) { - cmp(dst.x, src0.x, src1.x, src2.x); - cmp(dst.y, src0.y, src1.y, src2.y); - cmp(dst.z, src0.z, src1.z, src2.z); - cmp(dst.w, src0.w, src1.w, src2.w); + cmp0(dst.x, src0.x, src1.x, src2.x); + cmp0(dst.y, src0.y, src1.y, src2.y); + cmp0(dst.z, src0.z, src1.z, src2.z); + cmp0(dst.w, src0.w, src1.w, src2.w); } - - void ShaderCore::dp2add(Color4f &dst, Color4f &src0, Color4f &src1, Color4f &src2) - { - Float4 t = src0.x * src1.x + src0.y * src1.y + src2.x; - dst.x = t; - dst.y = t; - dst.z = t; - dst.w = t; + void ShaderCore::select(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2) + { + select(dst.x, As<Int4>(src0.x), src1.x, src2.x); + select(dst.y, As<Int4>(src0.y), src1.y, src2.y); + select(dst.z, As<Int4>(src0.z), src1.z, src2.z); + select(dst.w, As<Int4>(src0.w), src1.w, src2.w); + } + + void ShaderCore::extract(Float4 &dst, Vector4f &src0, Float4 &src1) + { + select(dst, CmpEQ(src1, Float4(1.0f)), src0.y, src0.x); + select(dst, CmpEQ(src1, Float4(2.0f)), src0.z, dst); + select(dst, CmpEQ(src1, Float4(3.0f)), src0.w, dst); + } + + void ShaderCore::insert(Vector4f &dst, Vector4f &src, Float4 &element, Float4 &index) + { + select(dst.x, CmpEQ(index, Float4(0.0f)), element, src.x); + select(dst.y, CmpEQ(index, Float4(1.0f)), element, src.y); + select(dst.z, CmpEQ(index, Float4(2.0f)), element, src.z); + select(dst.w, CmpEQ(index, Float4(3.0f)), element, src.w); } void ShaderCore::sgn(Float4 &dst, Float4 &src) { - Int4 neg = As<Int4>(CmpLT(src, Float4(0, 0, 0, 0))) & As<Int4>(Float4(-1, -1, -1, -1)); - Int4 pos = As<Int4>(CmpNLT(src, Float4(0, 0, 0, 0))) & As<Int4>(Float4(1, 1, 1, 1)); + Int4 neg = As<Int4>(CmpLT(src, Float4(-0.0f))) & As<Int4>(Float4(-1.0f)); + Int4 pos = As<Int4>(CmpNLE(src, Float4(+0.0f))) & As<Int4>(Float4(1.0f)); dst = As<Float4>(neg | pos); } - void ShaderCore::cmp(Float4 &dst, Float4 &src0, Float4 &src1, Float4 &src2) + void ShaderCore::cmp0(Float4 &dst, Float4 &src0, Float4 &src1, Float4 &src2) { - Int4 pos = CmpNLE(Float4(0.0f, 0.0f, 0.0f, 0.0f), src0); - Int4 t0 = pos & As<Int4>(src2); - Int4 t1 = ~pos & As<Int4>(src1); - dst = As<Float4>(t0 | t1); + Int4 pos = CmpLE(Float4(0.0f), src0); + select(dst, pos, src1, src2); } - void ShaderCore::setp(Color4f &dst, Color4f &src0, Color4f &src1, Control control) + void ShaderCore::select(Float4 &dst, RValue<Int4> src0, Float4 &src1, Float4 &src2) + { + // FIXME: LLVM vector select + dst = As<Float4>(src0 & As<Int4>(src1) | ~src0 & As<Int4>(src2)); + } + + void ShaderCore::cmp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Control control) { switch(control) { - case Op::CONTROL_GT: + case Shader::CONTROL_GT: dst.x = As<Float4>(CmpNLE(src0.x, src1.x)); dst.y = As<Float4>(CmpNLE(src0.y, src1.y)); dst.z = As<Float4>(CmpNLE(src0.z, src1.z)); dst.w = As<Float4>(CmpNLE(src0.w, src1.w)); break; - case Op::CONTROL_EQ: + case Shader::CONTROL_EQ: dst.x = As<Float4>(CmpEQ(src0.x, src1.x)); dst.y = As<Float4>(CmpEQ(src0.y, src1.y)); dst.z = As<Float4>(CmpEQ(src0.z, src1.z)); dst.w = As<Float4>(CmpEQ(src0.w, src1.w)); break; - case Op::CONTROL_GE: + case Shader::CONTROL_GE: dst.x = As<Float4>(CmpNLT(src0.x, src1.x)); dst.y = As<Float4>(CmpNLT(src0.y, src1.y)); dst.z = As<Float4>(CmpNLT(src0.z, src1.z)); dst.w = As<Float4>(CmpNLT(src0.w, src1.w)); break; - case Op::CONTROL_LT: + case Shader::CONTROL_LT: dst.x = As<Float4>(CmpLT(src0.x, src1.x)); dst.y = As<Float4>(CmpLT(src0.y, src1.y)); dst.z = As<Float4>(CmpLT(src0.z, src1.z)); dst.w = As<Float4>(CmpLT(src0.w, src1.w)); break; - case Op::CONTROL_NE: + case Shader::CONTROL_NE: dst.x = As<Float4>(CmpNEQ(src0.x, src1.x)); dst.y = As<Float4>(CmpNEQ(src0.y, src1.y)); dst.z = As<Float4>(CmpNEQ(src0.z, src1.z)); dst.w = As<Float4>(CmpNEQ(src0.w, src1.w)); break; - case Op::CONTROL_LE: + case Shader::CONTROL_LE: dst.x = As<Float4>(CmpLE(src0.x, src1.x)); dst.y = As<Float4>(CmpLE(src0.y, src1.y)); dst.z = As<Float4>(CmpLE(src0.z, src1.z)); @@ -393,4 +1224,82 @@ ASSERT(false); } } + + void ShaderCore::icmp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Control control) + { + switch(control) + { + case Shader::CONTROL_GT: + dst.x = As<Float4>(CmpNLE(As<Int4>(src0.x), As<Int4>(src1.x))); + dst.y = As<Float4>(CmpNLE(As<Int4>(src0.y), As<Int4>(src1.y))); + dst.z = As<Float4>(CmpNLE(As<Int4>(src0.z), As<Int4>(src1.z))); + dst.w = As<Float4>(CmpNLE(As<Int4>(src0.w), As<Int4>(src1.w))); + break; + case Shader::CONTROL_EQ: + dst.x = As<Float4>(CmpEQ(As<Int4>(src0.x), As<Int4>(src1.x))); + dst.y = As<Float4>(CmpEQ(As<Int4>(src0.y), As<Int4>(src1.y))); + dst.z = As<Float4>(CmpEQ(As<Int4>(src0.z), As<Int4>(src1.z))); + dst.w = As<Float4>(CmpEQ(As<Int4>(src0.w), As<Int4>(src1.w))); + break; + case Shader::CONTROL_GE: + dst.x = As<Float4>(CmpNLT(As<Int4>(src0.x), As<Int4>(src1.x))); + dst.y = As<Float4>(CmpNLT(As<Int4>(src0.y), As<Int4>(src1.y))); + dst.z = As<Float4>(CmpNLT(As<Int4>(src0.z), As<Int4>(src1.z))); + dst.w = As<Float4>(CmpNLT(As<Int4>(src0.w), As<Int4>(src1.w))); + break; + case Shader::CONTROL_LT: + dst.x = As<Float4>(CmpLT(As<Int4>(src0.x), As<Int4>(src1.x))); + dst.y = As<Float4>(CmpLT(As<Int4>(src0.y), As<Int4>(src1.y))); + dst.z = As<Float4>(CmpLT(As<Int4>(src0.z), As<Int4>(src1.z))); + dst.w = As<Float4>(CmpLT(As<Int4>(src0.w), As<Int4>(src1.w))); + break; + case Shader::CONTROL_NE: + dst.x = As<Float4>(CmpNEQ(As<Int4>(src0.x), As<Int4>(src1.x))); + dst.y = As<Float4>(CmpNEQ(As<Int4>(src0.y), As<Int4>(src1.y))); + dst.z = As<Float4>(CmpNEQ(As<Int4>(src0.z), As<Int4>(src1.z))); + dst.w = As<Float4>(CmpNEQ(As<Int4>(src0.w), As<Int4>(src1.w))); + break; + case Shader::CONTROL_LE: + dst.x = As<Float4>(CmpLE(As<Int4>(src0.x), As<Int4>(src1.x))); + dst.y = As<Float4>(CmpLE(As<Int4>(src0.y), As<Int4>(src1.y))); + dst.z = As<Float4>(CmpLE(As<Int4>(src0.z), As<Int4>(src1.z))); + dst.w = As<Float4>(CmpLE(As<Int4>(src0.w), As<Int4>(src1.w))); + break; + default: + ASSERT(false); + } + } + + void ShaderCore::all(Float4 &dst, Vector4f &src) + { + dst = As<Float4>(As<Int4>(src.x) & As<Int4>(src.y) & As<Int4>(src.z) & As<Int4>(src.w)); + } + + void ShaderCore::any(Float4 &dst, Vector4f &src) + { + dst = As<Float4>(As<Int4>(src.x) | As<Int4>(src.y) | As<Int4>(src.z) | As<Int4>(src.w)); + } + + void ShaderCore::not(Vector4f &dst, Vector4f &src) + { + dst.x = As<Float4>(As<Int4>(src.x) ^ Int4(0xFFFFFFFF)); + dst.y = As<Float4>(As<Int4>(src.y) ^ Int4(0xFFFFFFFF)); + dst.z = As<Float4>(As<Int4>(src.z) ^ Int4(0xFFFFFFFF)); + dst.w = As<Float4>(As<Int4>(src.w) ^ Int4(0xFFFFFFFF)); + } + + void ShaderCore::or(Float4 &dst, Float4 &src0, Float4 &src1) + { + dst = As<Float4>(As<Int4>(src0) | As<Int4>(src1)); + } + + void ShaderCore::xor(Float4 &dst, Float4 &src0, Float4 &src1) + { + dst = As<Float4>(As<Int4>(src0) ^ As<Int4>(src1)); + } + + void ShaderCore::and(Float4 &dst, Float4 &src0, Float4 &src1) + { + dst = As<Float4>(As<Int4>(src0) & As<Int4>(src1)); + } }
diff --git a/src/Shader/ShaderCore.hpp b/src/Shader/ShaderCore.hpp index 662e50a..6cb42f4 100644 --- a/src/Shader/ShaderCore.hpp +++ b/src/Shader/ShaderCore.hpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -12,51 +12,284 @@ #ifndef sw_ShaderCore_hpp #define sw_ShaderCore_hpp +#include "Debug.hpp" #include "Shader.hpp" #include "Reactor/Reactor.hpp" namespace sw { + class Vector4i + { + public: + Vector4i(); + Vector4i(unsigned short x, unsigned short y, unsigned short z, unsigned short w); + Vector4i(const Vector4i &rhs); + + Short4 &operator[](int i); + Vector4i &operator=(const Vector4i &rhs); + + Short4 x; + Short4 y; + Short4 z; + Short4 w; + }; + + class Vector4f + { + public: + Vector4f(); + Vector4f(float x, float y, float z, float w); + Vector4f(const Vector4f &rhs); + + Float4 &operator[](int i); + Vector4f &operator=(const Vector4f &rhs); + + Float4 x; + Float4 y; + Float4 z; + Float4 w; + }; + + Float4 exponential2(RValue<Float4> x, bool pp = false); + Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false); + Float4 exponential(RValue<Float4> x, bool pp = false); + Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false); + Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false); + Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false); + Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false); + Float4 modulo(RValue<Float4> x, RValue<Float4> y); + Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range + Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range + Float4 sine(RValue<Float4> x, bool pp = false); + Float4 cosine(RValue<Float4> x, bool pp = false); + Float4 tangent(RValue<Float4> x, bool pp = false); + Float4 arccos(RValue<Float4> x, bool pp = false); + Float4 arcsin(RValue<Float4> x, bool pp = false); + Float4 arctan(RValue<Float4> x, bool pp = false); + Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false); + + Float4 dot2(Vector4f &v0, Vector4f &v1); + Float4 dot3(Vector4f &v0, Vector4f &v1); + Float4 dot4(Vector4f &v0, Vector4f &v1); + + void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); + void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); + void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); + void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); + void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); + void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); + void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); + void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); + + class Register + { + public: + Register(Reference<Float4> &x, Reference<Float4> &y, Reference<Float4> &z, Reference<Float4> &w) : x(x), y(y), z(z), w(w) + { + } + + Reference<Float4> &operator[](int i) + { + switch(i) + { + default: + case 0: return x; + case 1: return y; + case 2: return z; + case 3: return w; + } + } + + Register &operator=(const Register &rhs) + { + x = rhs.x; + y = rhs.y; + z = rhs.z; + w = rhs.w; + + return *this; + } + + Register &operator=(const Vector4f &rhs) + { + x = rhs.x; + y = rhs.y; + z = rhs.z; + w = rhs.w; + + return *this; + } + + operator Vector4f() + { + Vector4f v; + + v.x = x; + v.y = y; + v.z = z; + v.w = w; + + return v; + } + + Reference<Float4> x; + Reference<Float4> y; + Reference<Float4> z; + Reference<Float4> w; + }; + + template<int S, bool D = false> + class RegisterArray + { + public: + RegisterArray(bool dynamic = D) : dynamic(dynamic) + { + if(dynamic) + { + x = new Array<Float4>(S); + y = new Array<Float4>(S); + z = new Array<Float4>(S); + w = new Array<Float4>(S); + } + else + { + x = new Array<Float4>[S]; + y = new Array<Float4>[S]; + z = new Array<Float4>[S]; + w = new Array<Float4>[S]; + } + } + + ~RegisterArray() + { + delete[] x; + delete[] y; + delete[] z; + delete[] w; + } + + Register operator[](int i) + { + if(dynamic) + { + return Register(x[0][i], y[0][i], z[0][i], w[0][i]); + } + else + { + return Register(x[i][0], y[i][0], z[i][0], w[i][0]); + } + } + + Register operator[](RValue<Int> i) + { + ASSERT(dynamic); + + return Register(x[0][i], y[0][i], z[0][i], w[0][i]); + } + + private: + const bool dynamic; + Array<Float4> *x; + Array<Float4> *y; + Array<Float4> *z; + Array<Float4> *w; + }; + class ShaderCore { - typedef Shader::Instruction::Operation::Control Control; - typedef Shader::Instruction::Operation Op; + typedef Shader::Control Control; public: - void mov(Color4f &dst, Color4f &src, bool floorToInteger = false); - void add(Color4f &dst, Color4f &src0, Color4f &src1); - void sub(Color4f &dst, Color4f &src0, Color4f &src1); - void mad(Color4f &dst, Color4f &src0, Color4f &src1, Color4f &src2); - void mul(Color4f &dst, Color4f &src0, Color4f &src1); - void rcp(Color4f &dst, Color4f &src, bool pp = false); - void rsq(Color4f &dst, Color4f &src, bool pp = false); - void dp3(Color4f &dst, Color4f &src0, Color4f &src1); - void dp4(Color4f &dst, Color4f &src0, Color4f &src1); - void min(Color4f &dst, Color4f &src0, Color4f &src1); - void max(Color4f &dst, Color4f &src0, Color4f &src1); - void slt(Color4f &dst, Color4f &src0, Color4f &src1); - void sge(Color4f &dst, Color4f &src0, Color4f &src1); - void exp(Color4f &dst, Color4f &src, bool pp = false); - void log(Color4f &dst, Color4f &src, bool pp = false); - void lit(Color4f &dst, Color4f &src); - void dst(Color4f &dst, Color4f &src0, Color4f &src1); - void lrp(Color4f &dst, Color4f &src0, Color4f &src1, Color4f &src2); - void frc(Color4f &dst, Color4f &src); - void pow(Color4f &dst, Color4f &src0, Color4f &src1, bool pp = false); - void crs(Color4f &dst, Color4f &src0, Color4f &src1); - void sgn(Color4f &dst, Color4f &src); - void abs(Color4f &dst, Color4f &src); - void nrm(Color4f &dst, Color4f &src, bool pp = false); - void sincos(Color4f &dst, Color4f &src, bool pp = false); - void expp(Color4f &dst, Color4f &src, unsigned short version); - void logp(Color4f &dst, Color4f &src, unsigned short version); - void cmp(Color4f &dst, Color4f &src0, Color4f &src1, Color4f &src2); - void dp2add(Color4f &dst, Color4f &src0, Color4f &src1, Color4f &src2); - void setp(Color4f &dst, Color4f &src0, Color4f &src1, Control control); + void mov(Vector4f &dst, Vector4f &src, bool floorToInteger = false); + void f2b(Vector4f &dst, Vector4f &src); + void b2f(Vector4f &dst, Vector4f &src); + void add(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void sub(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void mad(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void mul(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void rcpx(Vector4f &dst, Vector4f &src, bool pp = false); + void div(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void mod(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void rsqx(Vector4f &dst, Vector4f &src, bool pp = false); + void sqrt(Vector4f &dst, Vector4f &src, bool pp = false); + void rsq(Vector4f &dst, Vector4f &src, bool pp = false); + void len2(Float4 &dst, Vector4f &src, bool pp = false); + void len3(Float4 &dst, Vector4f &src, bool pp = false); + void len4(Float4 &dst, Vector4f &src, bool pp = false); + void dist1(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp = false); + void dist2(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp = false); + void dist3(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp = false); + void dist4(Float4 &dst, Vector4f &src0, Vector4f &src1, bool pp = false); + void dp1(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void dp2(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void dp2add(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void dp3(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void dp4(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void min(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void max(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void slt(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void step(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void exp2x(Vector4f &dst, Vector4f &src, bool pp = false); + void exp2(Vector4f &dst, Vector4f &src, bool pp = false); + void exp(Vector4f &dst, Vector4f &src, bool pp = false); + void log2x(Vector4f &dst, Vector4f &src, bool pp = false); + void log2(Vector4f &dst, Vector4f &src, bool pp = false); + void log(Vector4f &dst, Vector4f &src, bool pp = false); + void lit(Vector4f &dst, Vector4f &src); + void att(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void lrp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void smooth(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void frc(Vector4f &dst, Vector4f &src); + void trunc(Vector4f &dst, Vector4f &src); + void floor(Vector4f &dst, Vector4f &src); + void ceil(Vector4f &dst, Vector4f &src); + void powx(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp = false); + void pow(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp = false); + void crs(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void forward1(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void forward2(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void forward3(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void forward4(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void reflect1(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void reflect2(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void reflect3(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void reflect4(Vector4f &dst, Vector4f &src0, Vector4f &src1); + void refract1(Vector4f &dst, Vector4f &src0, Vector4f &src1, Float4 &src2); + void refract2(Vector4f &dst, Vector4f &src0, Vector4f &src1, Float4 &src2); + void refract3(Vector4f &dst, Vector4f &src0, Vector4f &src1, Float4 &src2); + void refract4(Vector4f &dst, Vector4f &src0, Vector4f &src1, Float4 &src2); + void sgn(Vector4f &dst, Vector4f &src); + void abs(Vector4f &dst, Vector4f &src); + void nrm2(Vector4f &dst, Vector4f &src, bool pp = false); + void nrm3(Vector4f &dst, Vector4f &src, bool pp = false); + void nrm4(Vector4f &dst, Vector4f &src, bool pp = false); + void sincos(Vector4f &dst, Vector4f &src, bool pp = false); + void cos(Vector4f &dst, Vector4f &src, bool pp = false); + void sin(Vector4f &dst, Vector4f &src, bool pp = false); + void tan(Vector4f &dst, Vector4f &src, bool pp = false); + void acos(Vector4f &dst, Vector4f &src, bool pp = false); + void asin(Vector4f &dst, Vector4f &src, bool pp = false); + void atan(Vector4f &dst, Vector4f &src, bool pp = false); + void atan2(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp = false); + void expp(Vector4f &dst, Vector4f &src, unsigned short version); + void logp(Vector4f &dst, Vector4f &src, unsigned short version); + void cmp0(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void cmp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Control control); + void icmp(Vector4f &dst, Vector4f &src0, Vector4f &src1, Control control); + void select(Vector4f &dst, Vector4f &src0, Vector4f &src1, Vector4f &src2); + void extract(Float4 &dst, Vector4f &src0, Float4 &src1); + void insert(Vector4f &dst, Vector4f &src, Float4 &element, Float4 &index); + void all(Float4 &dst, Vector4f &src); + void any(Float4 &dst, Vector4f &src); + void not(Vector4f &dst, Vector4f &src); + void or(Float4 &dst, Float4 &src0, Float4 &src1); + void xor(Float4 &dst, Float4 &src0, Float4 &src1); + void and(Float4 &dst, Float4 &src0, Float4 &src1); private: void sgn(Float4 &dst, Float4 &src); - void cmp(Float4 &dst, Float4 &src0, Float4 &src1, Float4 &src2); + void cmp0(Float4 &dst, Float4 &src0, Float4 &src1, Float4 &src2); + void select(Float4 &dst, RValue<Int4> src0, Float4 &src1, Float4 &src2); }; }
diff --git a/src/Shader/VertexPipeline.cpp b/src/Shader/VertexPipeline.cpp index 112b451..1e39ca7 100644 --- a/src/Shader/VertexPipeline.cpp +++ b/src/Shader/VertexPipeline.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -24,7 +24,7 @@ namespace sw { - VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state) + VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0) { } @@ -32,9 +32,9 @@ { } - Color4f VertexPipeline::transformBlend(Registers &r, Color4f &src, Pointer<Byte> &matrix, bool homogeneous) + Vector4f VertexPipeline::transformBlend(Registers &r, Register &src, Pointer<Byte> &matrix, bool homogeneous) { - Color4f dst; + Vector4f dst; if(state.vertexBlendMatrixCount == 0) { @@ -51,14 +51,15 @@ { for(int i = 0; i < 4; i++) { + Float4 B = r.v[BlendIndices].x; UInt indices; switch(i) { - case 0: indices = As<UInt>(Float(r.v[BlendIndices].x.x)); break; - case 1: indices = As<UInt>(Float(r.v[BlendIndices].x.y)); break; - case 2: indices = As<UInt>(Float(r.v[BlendIndices].x.z)); break; - case 3: indices = As<UInt>(Float(r.v[BlendIndices].x.w)); break; + case 0: indices = As<UInt>(Float(B.x)); break; + case 1: indices = As<UInt>(Float(B.y)); break; + case 2: indices = As<UInt>(Float(B.z)); break; + case 3: indices = As<UInt>(Float(B.w)); break; } index0[i] = (indices & UInt(0x000000FF)) << UInt(6); // FIXME: (indices & 0x000000FF) << 6 @@ -100,13 +101,13 @@ { weight1 = Float4(1.0f) - weight0; - Color4f pos0; - Color4f pos1; + Vector4f pos0; + Vector4f pos1; pos0 = transform(src, matrix, index0, homogeneous); pos1 = transform(src, matrix, index1, homogeneous); - dst.x = pos0.x * weight0 + pos1.x * weight1; // FIXME: Color4f operators + dst.x = pos0.x * weight0 + pos1.x * weight1; // FIXME: Vector4f operators dst.y = pos0.y * weight0 + pos1.y * weight1; dst.z = pos0.z * weight0 + pos1.z * weight1; dst.w = pos0.w * weight0 + pos1.w * weight1; @@ -115,9 +116,9 @@ { weight2 = Float4(1.0f) - (weight0 + weight1); - Color4f pos0; - Color4f pos1; - Color4f pos2; + Vector4f pos0; + Vector4f pos1; + Vector4f pos2; pos0 = transform(src, matrix, index0, homogeneous); pos1 = transform(src, matrix, index1, homogeneous); @@ -132,10 +133,10 @@ { weight3 = Float4(1.0f) - (weight0 + weight1 + weight2); - Color4f pos0; - Color4f pos1; - Color4f pos2; - Color4f pos3; + Vector4f pos0; + Vector4f pos1; + Vector4f pos2; + Vector4f pos3; pos0 = transform(src, matrix, index0, homogeneous); pos1 = transform(src, matrix, index1, homogeneous); @@ -154,8 +155,8 @@ void VertexPipeline::pipeline(Registers &r) { - Color4f position; - Color4f normal; + Vector4f position; + Vector4f normal; if(!state.preTransformed) { @@ -166,10 +167,10 @@ position = r.v[PositionT]; } - r.ox[Pos] = position.x; - r.oy[Pos] = position.y; - r.oz[Pos] = position.z; - r.ow[Pos] = position.w; + r.o[Pos].x = position.x; + r.o[Pos].y = position.y; + r.o[Pos].z = position.z; + r.o[Pos].w = position.w; if(state.vertexNormalActive) { @@ -186,59 +187,59 @@ // FIXME: Don't process if not used at all if(state.diffuseActive && state.input[Color0]) { - Color4f diffuse = r.v[Color0]; + Vector4f diffuse = r.v[Color0]; - r.ox[D0] = diffuse.x; - r.oy[D0] = diffuse.y; - r.oz[D0] = diffuse.z; - r.ow[D0] = diffuse.w; + r.o[D0].x = diffuse.x; + r.o[D0].y = diffuse.y; + r.o[D0].z = diffuse.z; + r.o[D0].w = diffuse.w; } else { - r.ox[D0] = Float4(1.0f, 1.0f, 1.0f, 1.0f); - r.oy[D0] = Float4(1.0f, 1.0f, 1.0f, 1.0f); - r.oz[D0] = Float4(1.0f, 1.0f, 1.0f, 1.0f); - r.ow[D0] = Float4(1.0f, 1.0f, 1.0f, 1.0f); + r.o[D0].x = Float4(1.0f); + r.o[D0].y = Float4(1.0f); + r.o[D0].z = Float4(1.0f); + r.o[D0].w = Float4(1.0f); } // FIXME: Don't process if not used at all if(state.specularActive && state.input[Color1]) { - Color4f specular = r.v[Color1]; + Vector4f specular = r.v[Color1]; - r.ox[D1] = specular.x; - r.oy[D1] = specular.y; - r.oz[D1] = specular.z; - r.ow[D1] = specular.w; + r.o[D1].x = specular.x; + r.o[D1].y = specular.y; + r.o[D1].z = specular.z; + r.o[D1].w = specular.w; } else { - r.ox[D1] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.oy[D1] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.oz[D1] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.ow[D1] = Float4(1.0f, 1.0f, 1.0f, 1.0f); + r.o[D1].x = Float4(0.0f); + r.o[D1].y = Float4(0.0f); + r.o[D1].z = Float4(0.0f); + r.o[D1].w = Float4(1.0f); } } else { - Color4f diffuseSum; + Vector4f diffuseSum; - r.ox[D0] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.oy[D0] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.oz[D0] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.ow[D0] = Float4(0.0f, 0.0f, 0.0f, 0.0f); + r.o[D0].x = Float4(0.0f); + r.o[D0].y = Float4(0.0f); + r.o[D0].z = Float4(0.0f); + r.o[D0].w = Float4(0.0f); - r.ox[D1] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.oy[D1] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.oz[D1] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.ow[D1] = Float4(0.0f, 0.0f, 0.0f, 0.0f); + r.o[D1].x = Float4(0.0f); + r.o[D1].y = Float4(0.0f); + r.o[D1].z = Float4(0.0f); + r.o[D1].w = Float4(0.0f); - diffuseSum.x = Float4(0.0f, 0.0f, 0.0f, 0.0f); - diffuseSum.y = Float4(0.0f, 0.0f, 0.0f, 0.0f); - diffuseSum.z = Float4(0.0f, 0.0f, 0.0f, 0.0f); - diffuseSum.w = Float4(0.0f, 0.0f, 0.0f, 0.0f); + diffuseSum.x = Float4(0.0f); + diffuseSum.y = Float4(0.0f); + diffuseSum.z = Float4(0.0f); + diffuseSum.w = Float4(0.0f); - Color4f vertexPosition = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); + Vector4f vertexPosition = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); for(int i = 0; i < 8; i++) { @@ -247,7 +248,7 @@ continue; } - Color4f L; // Light vector + Vector4f L; // Light vector Float4 att; // Attenuation // Attenuation @@ -280,9 +281,9 @@ { Float4 lightAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack - r.ox[D0] = r.ox[D0] + lightAmbient.x * att; - r.oy[D0] = r.oy[D0] + lightAmbient.y * att; - r.oz[D0] = r.oz[D0] + lightAmbient.z * att; + r.o[D0].x = r.o[D0].x + lightAmbient.x * att; + r.o[D0].y = r.o[D0].y + lightAmbient.y * att; + r.o[D0].z = r.o[D0].z + lightAmbient.z * att; } // Diffuse @@ -291,10 +292,10 @@ Float4 dot; dot = dot3(L, normal); - dot = Max(dot, Float4(0.0f, 0.0f, 0.0f, 0.0f)); + dot = Max(dot, Float4(0.0f)); dot *= att; - Color4f diff; + Vector4f diff; if(state.vertexDiffuseMaterialSourceActive == Context::MATERIAL) { @@ -323,15 +324,15 @@ // Specular if(state.vertexSpecularActive) { - Color4f S; - Color4f C; // Camera vector + Vector4f S; + Vector4f C; // Camera vector Float4 pow; pow = *Pointer<Float>(r.data + OFFSET(DrawData,ff.materialShininess)); - S.x = Float4(0.0f, 0.0f, 0.0f, 0.0f) - vertexPosition.x; - S.y = Float4(0.0f, 0.0f, 0.0f, 0.0f) - vertexPosition.y; - S.z = Float4(0.0f, 0.0f, 0.0f, 0.0f) - vertexPosition.z; + S.x = Float4(0.0f) - vertexPosition.x; + S.y = Float4(0.0f) - vertexPosition.y; + S.z = Float4(0.0f) - vertexPosition.z; C = normalize(S); S.x = L.x + C.x; @@ -344,7 +345,7 @@ Float4 P = power(dot, pow); P *= att; - Color4f spec; + Vector4f spec; if(state.vertexSpecularMaterialSourceActive == Context::MATERIAL) { @@ -374,93 +375,93 @@ spec.y *= P; spec.z *= P; - spec.x = Max(spec.x, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - spec.y = Max(spec.y, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - spec.z = Max(spec.z, Float4(0.0f, 0.0f, 0.0f, 0.0f)); + spec.x = Max(spec.x, Float4(0.0f)); + spec.y = Max(spec.y, Float4(0.0f)); + spec.z = Max(spec.z, Float4(0.0f)); - r.ox[D1] = r.ox[D1] + spec.x; - r.oy[D1] = r.oy[D1] + spec.y; - r.oz[D1] = r.oz[D1] + spec.z; + r.o[D1].x = r.o[D1].x + spec.x; + r.o[D1].y = r.o[D1].y + spec.y; + r.o[D1].z = r.o[D1].z + spec.z; } } Float4 globalAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack - r.ox[D0] = r.ox[D0] + globalAmbient.x; - r.oy[D0] = r.oy[D0] + globalAmbient.y; - r.oz[D0] = r.oz[D0] + globalAmbient.z; + r.o[D0].x = r.o[D0].x + globalAmbient.x; + r.o[D0].y = r.o[D0].y + globalAmbient.y; + r.o[D0].z = r.o[D0].z + globalAmbient.z; if(state.vertexAmbientMaterialSourceActive == Context::MATERIAL) { Float4 materialAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack - r.ox[D0] = r.ox[D0] * materialAmbient.x; - r.oy[D0] = r.oy[D0] * materialAmbient.y; - r.oz[D0] = r.oz[D0] * materialAmbient.z; + r.o[D0].x = r.o[D0].x * materialAmbient.x; + r.o[D0].y = r.o[D0].y * materialAmbient.y; + r.o[D0].z = r.o[D0].z * materialAmbient.z; } else if(state.vertexAmbientMaterialSourceActive == Context::COLOR1) { - Color4f materialDiffuse = r.v[Color0]; + Vector4f materialDiffuse = r.v[Color0]; - r.ox[D0] = r.ox[D0] * materialDiffuse.x; - r.oy[D0] = r.oy[D0] * materialDiffuse.y; - r.oz[D0] = r.oz[D0] * materialDiffuse.z; + r.o[D0].x = r.o[D0].x * materialDiffuse.x; + r.o[D0].y = r.o[D0].y * materialDiffuse.y; + r.o[D0].z = r.o[D0].z * materialDiffuse.z; } else if(state.vertexAmbientMaterialSourceActive == Context::COLOR2) { - Color4f materialSpecular = r.v[Color1]; + Vector4f materialSpecular = r.v[Color1]; - r.ox[D0] = r.ox[D0] * materialSpecular.x; - r.oy[D0] = r.oy[D0] * materialSpecular.y; - r.oz[D0] = r.oz[D0] * materialSpecular.z; + r.o[D0].x = r.o[D0].x * materialSpecular.x; + r.o[D0].y = r.o[D0].y * materialSpecular.y; + r.o[D0].z = r.o[D0].z * materialSpecular.z; } else ASSERT(false); - r.ox[D0] = r.ox[D0] + diffuseSum.x; - r.oy[D0] = r.oy[D0] + diffuseSum.y; - r.oz[D0] = r.oz[D0] + diffuseSum.z; + r.o[D0].x = r.o[D0].x + diffuseSum.x; + r.o[D0].y = r.o[D0].y + diffuseSum.y; + r.o[D0].z = r.o[D0].z + diffuseSum.z; // Emissive if(state.vertexEmissiveMaterialSourceActive == Context::MATERIAL) { Float4 materialEmission = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack - r.ox[D0] = r.ox[D0] + materialEmission.x; - r.oy[D0] = r.oy[D0] + materialEmission.y; - r.oz[D0] = r.oz[D0] + materialEmission.z; + r.o[D0].x = r.o[D0].x + materialEmission.x; + r.o[D0].y = r.o[D0].y + materialEmission.y; + r.o[D0].z = r.o[D0].z + materialEmission.z; } else if(state.vertexEmissiveMaterialSourceActive == Context::COLOR1) { - Color4f materialSpecular = r.v[Color0]; + Vector4f materialSpecular = r.v[Color0]; - r.ox[D0] = r.ox[D0] + materialSpecular.x; - r.oy[D0] = r.oy[D0] + materialSpecular.y; - r.oz[D0] = r.oz[D0] + materialSpecular.z; + r.o[D0].x = r.o[D0].x + materialSpecular.x; + r.o[D0].y = r.o[D0].y + materialSpecular.y; + r.o[D0].z = r.o[D0].z + materialSpecular.z; } else if(state.vertexEmissiveMaterialSourceActive == Context::COLOR2) { - Color4f materialSpecular = r.v[Color1]; + Vector4f materialSpecular = r.v[Color1]; - r.ox[D0] = r.ox[D0] + materialSpecular.x; - r.oy[D0] = r.oy[D0] + materialSpecular.y; - r.oz[D0] = r.oz[D0] + materialSpecular.z; + r.o[D0].x = r.o[D0].x + materialSpecular.x; + r.o[D0].y = r.o[D0].y + materialSpecular.y; + r.o[D0].z = r.o[D0].z + materialSpecular.z; } else ASSERT(false); // Diffuse alpha component if(state.vertexDiffuseMaterialSourceActive == Context::MATERIAL) { - r.ow[D0] = Float4(*Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack + r.o[D0].w = Float4(*Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack } else if(state.vertexDiffuseMaterialSourceActive == Context::COLOR1) { - Color4f alpha = r.v[Color0]; - r.ow[D0] = alpha.w; + Vector4f alpha = r.v[Color0]; + r.o[D0].w = alpha.w; } else if(state.vertexDiffuseMaterialSourceActive == Context::COLOR2) { - Color4f alpha = r.v[Color1]; - r.ow[D0] = alpha.w; + Vector4f alpha = r.v[Color1]; + r.o[D0].w = alpha.w; } else ASSERT(false); @@ -469,17 +470,17 @@ // Specular alpha component if(state.vertexSpecularMaterialSourceActive == Context::MATERIAL) { - r.ow[D1] = Float4(*Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack + r.o[D1].w = Float4(*Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack } else if(state.vertexSpecularMaterialSourceActive == Context::COLOR1) { - Color4f alpha = r.v[Color0]; - r.ow[D1] = alpha.w; + Vector4f alpha = r.v[Color0]; + r.o[D1].w = alpha.w; } else if(state.vertexSpecularMaterialSourceActive == Context::COLOR2) { - Color4f alpha = r.v[Color1]; - r.ow[D1] = alpha.w; + Vector4f alpha = r.v[Color1]; + r.o[D1].w = alpha.w; } else ASSERT(false); } @@ -492,11 +493,11 @@ case Context::FOG_NONE: if(state.specularActive) { - r.ox[Fog] = r.ow[D1]; + r.o[Fog].x = r.o[D1].w; } else { - r.ox[Fog] = Float4(0.0f, 0.0f, 0.0f, 0.0f); + r.o[Fog].x = Float4(0.0f); } break; case Context::FOG_LINEAR: @@ -504,21 +505,21 @@ case Context::FOG_EXP2: if(!state.rangeFogActive) { - r.ox[Fog] = r.oz[Pos]; + r.o[Fog].x = r.o[Pos].z; } else { - Color4f pos; + Vector4f pos; - pos.x = r.ox[Pos]; - pos.y = r.oy[Pos]; - pos.z = r.oz[Pos]; - pos.w = r.ow[Pos]; + pos.x = r.o[Pos].x; + pos.y = r.o[Pos].y; + pos.z = r.o[Pos].z; + pos.w = r.o[Pos].w; - r.ox[Fog] = Sqrt(dot3(pos, pos)); // FIXME: oFog = length(o[Pos]); + r.o[Fog].x = Sqrt(dot3(pos, pos)); // FIXME: oFog = length(o[Pos]); } - r.ox[Fog] = r.ox[Fog] * *Pointer<Float4>(r.data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(r.data + OFFSET(DrawData,fog.offset)); + r.o[Fog].x = r.o[Fog].x * *Pointer<Float4>(r.data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(r.data + OFFSET(DrawData,fog.offset)); break; default: ASSERT(false); @@ -533,7 +534,7 @@ processPointSize(r); } - void VertexPipeline::processTextureCoordinate(Registers &r, int stage, Color4f &normal, Color4f &position) + void VertexPipeline::processTextureCoordinate(Registers &r, int stage, Vector4f &normal, Vector4f &position) { if(state.output[T0 + stage].write) { @@ -543,28 +544,28 @@ { case Context::TEXGEN_PASSTHRU: { - Color4f v = r.v[TexCoord0 + i]; + Vector4f v = r.v[TexCoord0 + i]; - r.ox[T0 + stage] = v.x; - r.oy[T0 + stage] = v.y; - r.oz[T0 + stage] = v.z; - r.ow[T0 + stage] = v.w; + r.o[T0 + stage].x = v.x; + r.o[T0 + stage].y = v.y; + r.o[T0 + stage].z = v.z; + r.o[T0 + stage].w = v.w; - if(state.input[TexCoord0 + i].type == STREAMTYPE_FLOAT) + if(state.input[TexCoord0 + i]) { switch(state.input[TexCoord0 + i].count) { case 1: - r.oy[T0 + stage] = Float4(1.0f, 1.0f, 1.0f, 1.0f); - r.oz[T0 + stage] = Float4(0.0f, 0.0f, 0.0f, 0.0f); - r.ow[T0 + stage] = Float4(0.0f, 0.0f, 0.0f, 0.0f); + r.o[T0 + stage].y = Float4(1.0f); + r.o[T0 + stage].z = Float4(0.0f); + r.o[T0 + stage].w = Float4(0.0f); break; case 2: - r.oz[T0 + stage] = Float4(1.0f, 1.0f, 1.0f, 1.0f); - r.ow[T0 + stage] = Float4(0.0f, 0.0f, 0.0f, 0.0f); + r.o[T0 + stage].z = Float4(1.0f); + r.o[T0 + stage].w = Float4(0.0f); break; case 3: - r.ow[T0 + stage] = Float4(1.0f, 1.0f, 1.0f, 1.0f); + r.o[T0 + stage].w = Float4(1.0f); break; case 4: break; @@ -572,12 +573,11 @@ ASSERT(false); } } - else ASSERT(!state.input[TexCoord0 + i]); // Point sprite; coordinates provided by setup } break; case Context::TEXGEN_NORMAL: { - Color4f Nc; // Normal vector in camera space + Vector4f Nc; // Normal vector in camera space if(state.vertexNormalActive) { @@ -585,51 +585,51 @@ } else { - Nc.x = Float4(0.0f, 0.0f, 0.0f, 0.0f); - Nc.y = Float4(0.0f, 0.0f, 0.0f, 0.0f); - Nc.z = Float4(0.0f, 0.0f, 0.0f, 0.0f); + Nc.x = Float4(0.0f); + Nc.y = Float4(0.0f); + Nc.z = Float4(0.0f); } - Nc.w = Float4(1.0f, 1.0f, 1.0f, 1.0f); + Nc.w = Float4(1.0f); - r.ox[T0 + stage] = Nc.x; - r.oy[T0 + stage] = Nc.y; - r.oz[T0 + stage] = Nc.z; - r.ow[T0 + stage] = Nc.w; + r.o[T0 + stage].x = Nc.x; + r.o[T0 + stage].y = Nc.y; + r.o[T0 + stage].z = Nc.z; + r.o[T0 + stage].w = Nc.w; } break; case Context::TEXGEN_POSITION: { - Color4f Pn = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space + Vector4f Pn = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space - Pn.w = Float4(1.0f, 1.0f, 1.0f, 1.0f); + Pn.w = Float4(1.0f); - r.ox[T0 + stage] = Pn.x; - r.oy[T0 + stage] = Pn.y; - r.oz[T0 + stage] = Pn.z; - r.ow[T0 + stage] = Pn.w; + r.o[T0 + stage].x = Pn.x; + r.o[T0 + stage].y = Pn.y; + r.o[T0 + stage].z = Pn.z; + r.o[T0 + stage].w = Pn.w; } break; case Context::TEXGEN_REFLECTION: { - Color4f R; // Reflection vector + Vector4f R; // Reflection vector if(state.vertexNormalActive) { - Color4f Nc; // Normal vector in camera space + Vector4f Nc; // Normal vector in camera space Nc = normal; if(state.localViewerActive) { - Color4f Ec; // Eye vector in camera space - Color4f N2; + Vector4f Ec; // Eye vector in camera space + Vector4f N2; Ec = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); Ec = normalize(Ec); // R = E - 2 * N * (E . N) - Float4 dot = Float4(2.0f, 2.0f, 2.0f, 2.0f) * dot3(Ec, Nc); + Float4 dot = Float4(2.0f) * dot3(Ec, Nc); R.x = Ec.x - Nc.x * dot; R.y = Ec.y - Nc.y * dot; @@ -641,46 +641,46 @@ // v = -2 * Nz * Ny // w = 1 - 2 * Nz * Nz - R.x = -Float4(2.0f, 2.0f, 2.0f, 2.0f) * Nc.z * Nc.x; - R.y = -Float4(2.0f, 2.0f, 2.0f, 2.0f) * Nc.z * Nc.y; - R.z = Float4(1.0f, 1.0f, 1.0f, 1.0f) - Float4(2.0f, 2.0f, 2.0f, 2.0f) * Nc.z * Nc.z; + R.x = -Float4(2.0f) * Nc.z * Nc.x; + R.y = -Float4(2.0f) * Nc.z * Nc.y; + R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; } } else { - R.x = Float4(0.0f, 0.0f, 0.0f, 0.0f); - R.y = Float4(0.0f, 0.0f, 0.0f, 0.0f); - R.z = Float4(0.0f, 0.0f, 0.0f, 0.0f); + R.x = Float4(0.0f); + R.y = Float4(0.0f); + R.z = Float4(0.0f); } - R.w = Float4(1.0f, 1.0f, 1.0f, 1.0f); + R.w = Float4(1.0f); - r.ox[T0 + stage] = R.x; - r.oy[T0 + stage] = R.y; - r.oz[T0 + stage] = R.z; - r.ow[T0 + stage] = R.w; + r.o[T0 + stage].x = R.x; + r.o[T0 + stage].y = R.y; + r.o[T0 + stage].z = R.z; + r.o[T0 + stage].w = R.w; } break; case Context::TEXGEN_SPHEREMAP: { - Color4f R; // Reflection vector + Vector4f R; // Reflection vector if(state.vertexNormalActive) { - Color4f Nc; // Normal vector in camera space + Vector4f Nc; // Normal vector in camera space Nc = normal; if(state.localViewerActive) { - Color4f Ec; // Eye vector in camera space - Color4f N2; + Vector4f Ec; // Eye vector in camera space + Vector4f N2; Ec = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); Ec = normalize(Ec); // R = E - 2 * N * (E . N) - Float4 dot = Float4(2.0f, 2.0f, 2.0f, 2.0f) * dot3(Ec, Nc); + Float4 dot = Float4(2.0f) * dot3(Ec, Nc); R.x = Ec.x - Nc.x * dot; R.y = Ec.y - Nc.y * dot; @@ -692,48 +692,48 @@ // v = -2 * Nz * Ny // w = 1 - 2 * Nz * Nz - R.x = -Float4(2.0f, 2.0f, 2.0f, 2.0f) * Nc.z * Nc.x; - R.y = -Float4(2.0f, 2.0f, 2.0f, 2.0f) * Nc.z * Nc.y; - R.z = Float4(1.0f, 1.0f, 1.0f, 1.0f) - Float4(2.0f, 2.0f, 2.0f, 2.0f) * Nc.z * Nc.z; + R.x = -Float4(2.0f) * Nc.z * Nc.x; + R.y = -Float4(2.0f) * Nc.z * Nc.y; + R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; } } else { - R.x = Float4(0.0f, 0.0f, 0.0f, 0.0f); - R.y = Float4(0.0f, 0.0f, 0.0f, 0.0f); - R.z = Float4(0.0f, 0.0f, 0.0f, 0.0f); + R.x = Float4(0.0f); + R.y = Float4(0.0f); + R.z = Float4(0.0f); } - R.z -= Float4(1.0f, 1.0f, 1.0f, 1.0f); + R.z -= Float4(1.0f); R = normalize(R); - R.x = Float4(0.5f, 0.5f, 0.5f, 0.5f) * R.x + Float4(0.5f, 0.5f, 0.5f, 0.5f); - R.y = Float4(0.5f, 0.5f, 0.5f, 0.5f) * R.y + Float4(0.5f, 0.5f, 0.5f, 0.5f); + R.x = Float4(0.5f) * R.x + Float4(0.5f); + R.y = Float4(0.5f) * R.y + Float4(0.5f); - R.z = Float4(1.0f, 1.0f, 1.0f, 1.0f); - R.w = Float4(0.0f, 0.0f, 0.0f, 0.0f); + R.z = Float4(1.0f); + R.w = Float4(0.0f); - r.ox[T0 + stage] = R.x; - r.oy[T0 + stage] = R.y; - r.oz[T0 + stage] = R.z; - r.ow[T0 + stage] = R.w; + r.o[T0 + stage].x = R.x; + r.o[T0 + stage].y = R.y; + r.o[T0 + stage].z = R.z; + r.o[T0 + stage].w = R.w; } break; default: ASSERT(false); } - Color4f texTrans0; - Color4f texTrans1; - Color4f texTrans2; - Color4f texTrans3; + Vector4f texTrans0; + Vector4f texTrans1; + Vector4f texTrans2; + Vector4f texTrans3; - Color4f T; - Color4f t; + Vector4f T; + Vector4f t; - T.x = r.ox[T0 + stage]; - T.y = r.oy[T0 + stage]; - T.z = r.oz[T0 + stage]; - T.w = r.ow[T0 + stage]; + T.x = r.o[T0 + stage].x; + T.y = r.o[T0 + stage].y; + T.z = r.o[T0 + stage].z; + T.w = r.o[T0 + stage].w; switch(state.textureState[stage].textureTransformCountActive) { @@ -766,10 +766,10 @@ texTrans0.w = texTrans0.w.wwww; t.x = dot4(T, texTrans0); - r.ox[T0 + stage] = t.x; - r.oy[T0 + stage] = t.y; - r.oz[T0 + stage] = t.z; - r.ow[T0 + stage] = t.w; + r.o[T0 + stage].x = t.x; + r.o[T0 + stage].y = t.y; + r.o[T0 + stage].z = t.z; + r.o[T0 + stage].w = t.w; case 0: break; default: @@ -787,16 +787,16 @@ if(state.input[PSize]) { - r.oy[Pts] = r.v[PSize].x; + r.o[Pts].y = r.v[PSize].x; } else { - r.oy[Pts] = *Pointer<Float4>(r.data + OFFSET(DrawData,point.pointSize)); + r.o[Pts].y = *Pointer<Float4>(r.data + OFFSET(DrawData,point.pointSize)); } if(state.pointScaleActive && !state.preTransformed) { - Color4f p = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); + Vector4f p = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); Float4 d = Sqrt(dot3(p, p)); // FIXME: length(p); @@ -806,18 +806,13 @@ A = RcpSqrt_pp(A + d * (B + d * C)); - r.oy[Pts] = r.oy[Pts] * Float4(*Pointer<Float>(r.data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack + r.o[Pts].y = r.o[Pts].y * Float4(*Pointer<Float>(r.data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack } } - Color4f VertexPipeline::transform(Color4f &src, Pointer<Byte> &matrix, bool homogeneous) + Vector4f VertexPipeline::transform(Register &src, Pointer<Byte> &matrix, bool homogeneous) { - Color4f dst; - - Color4f row0; - Color4f row1; - Color4f row2; - Color4f row3; + Vector4f dst; if(homogeneous) { @@ -862,14 +857,9 @@ return dst; } - Color4f VertexPipeline::transform(Color4f &src, Pointer<Byte> &matrix, UInt index[4], bool homogeneous) + Vector4f VertexPipeline::transform(Register &src, Pointer<Byte> &matrix, UInt index[4], bool homogeneous) { - Color4f dst; - - Color4f row0; - Color4f row1; - Color4f row2; - Color4f row3; + Vector4f dst; if(homogeneous) { @@ -914,9 +904,9 @@ return dst; } - Color4f VertexPipeline::normalize(Color4f &src) + Vector4f VertexPipeline::normalize(Vector4f &src) { - Color4f dst; + Vector4f dst; Float4 rcpLength = RcpSqrt_pp(dot3(src, src)); @@ -933,11 +923,11 @@ dst = dst * dst; dst = dst * dst; - dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f, 1.0f, 1.0f, 1.0f))); + dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f))); dst *= src1; - dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f, 1.0f, 1.0f, 1.0f))); + dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f))); dst = RcpSqrt_pp(dst); dst = RcpSqrt_pp(dst);
diff --git a/src/Shader/VertexPipeline.hpp b/src/Shader/VertexPipeline.hpp index efb8592..bfc3093 100644 --- a/src/Shader/VertexPipeline.hpp +++ b/src/Shader/VertexPipeline.hpp
@@ -1,42 +1,42 @@ -// SwiftShader Software Renderer -// -// Copyright(c) 2005-2011 TransGaming Inc. -// -// All rights reserved. No part of this software may be copied, distributed, transmitted, -// transcribed, stored in a retrieval system, translated into any human or computer -// language by any means, or disclosed to third parties without the explicit written -// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express -// or implied, including but not limited to any patent rights, are granted to you. -// - -#ifndef sw_VertexPipeline_hpp -#define sw_VertexPipeline_hpp - -#include "VertexRoutine.hpp" - -#include "Context.hpp" -#include "VertexProcessor.hpp" - -namespace sw -{ - class VertexPipeline : public VertexRoutine - { - public: - VertexPipeline(const VertexProcessor::State &state); - - virtual ~VertexPipeline(); - - private: - void pipeline(Registers &r); - void processTextureCoordinate(Registers &r, int stage, Color4f &normal, Color4f &position); - void processPointSize(Registers &r); - - Color4f transformBlend(Registers &r, Color4f &src, Pointer<Byte> &matrix, bool homogenous); - Color4f transform(Color4f &src, Pointer<Byte> &matrix, bool homogenous); - Color4f transform(Color4f &src, Pointer<Byte> &matrix, UInt index[4], bool homogenous); - Color4f normalize(Color4f &src); - Float4 power(Float4 &src0, Float4 &src1); - }; -}; - -#endif // sw_VertexPipeline_hpp +// SwiftShader Software Renderer +// +// Copyright(c) 2005-2012 TransGaming Inc. +// +// All rights reserved. No part of this software may be copied, distributed, transmitted, +// transcribed, stored in a retrieval system, translated into any human or computer +// language by any means, or disclosed to third parties without the explicit written +// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express +// or implied, including but not limited to any patent rights, are granted to you. +// + +#ifndef sw_VertexPipeline_hpp +#define sw_VertexPipeline_hpp + +#include "VertexRoutine.hpp" + +#include "Context.hpp" +#include "VertexProcessor.hpp" + +namespace sw +{ + class VertexPipeline : public VertexRoutine + { + public: + VertexPipeline(const VertexProcessor::State &state); + + virtual ~VertexPipeline(); + + private: + void pipeline(Registers &r); + void processTextureCoordinate(Registers &r, int stage, Vector4f &normal, Vector4f &position); + void processPointSize(Registers &r); + + Vector4f transformBlend(Registers &r, Register &src, Pointer<Byte> &matrix, bool homogenous); + Vector4f transform(Register &src, Pointer<Byte> &matrix, bool homogenous); + Vector4f transform(Register &src, Pointer<Byte> &matrix, UInt index[4], bool homogenous); + Vector4f normalize(Vector4f &src); + Float4 power(Float4 &src0, Float4 &src1); + }; +}; + +#endif // sw_VertexPipeline_hpp
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp index 41153bf..a292a8e 100644 --- a/src/Shader/VertexProgram.cpp +++ b/src/Shader/VertexProgram.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -22,12 +22,13 @@ namespace sw { - VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *vertexShader) : VertexRoutine(state), vertexShader(vertexShader) + VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) : VertexRoutine(state, shader) { - returns = false; ifDepth = 0; loopRepDepth = 0; breakDepth = 0; + currentLabel = -1; + whileTest = false; for(int i = 0; i < 2048; i++) { @@ -52,7 +53,7 @@ if(!state.preTransformed) { - shader(r); + program(r); } else { @@ -60,296 +61,320 @@ } } - Color4f VertexProgram::readConstant(Registers &r, const Src &src, int offset) + void VertexProgram::program(Registers &r) { - Color4f c; + // shader->print("VertexShader-%0.8X.txt", state.shaderID); - int i = src.index + offset; - bool relative = src.relative; - - if(!relative) - { - c.r = c.g = c.b = c.a = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i])); - - c.r = c.r.xxxx; - c.g = c.g.yyyy; - c.b = c.b.zzzz; - c.a = c.a.wwww; - - if(localShaderConstants) // Constant may be known at compile time - { - for(int j = 0; j < vertexShader->getLength(); j++) - { - const ShaderInstruction &instruction = *vertexShader->getInstruction(j); - - if(instruction.getOpcode() == ShaderOperation::OPCODE_DEF) - { - if(instruction.getDestinationParameter().index == i) - { - c.r = Float4(instruction.getSourceParameter(0).value); - c.g = Float4(instruction.getSourceParameter(1).value); - c.b = Float4(instruction.getSourceParameter(2).value); - c.a = Float4(instruction.getSourceParameter(3).value); - - break; - } - } - } - } - } - else if(src.relativeType == Src::PARAMETER_LOOP) - { - Int loopCounter = r.aL[r.loopDepth]; - - c.r = c.g = c.b = c.a = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16); - - c.r = c.r.xxxx; - c.g = c.g.yyyy; - c.b = c.b.zzzz; - c.a = c.a.wwww; - } - else - { - Int index0; - Int index1; - Int index2; - Int index3; - - Float4 a0_; - - switch(src.relativeSwizzle & 0x03) - { - case 0: a0_ = r.a0.x; break; - case 1: a0_ = r.a0.y; break; - case 2: a0_ = r.a0.z; break; - case 3: a0_ = r.a0.w; break; - } - - index0 = i + RoundInt(Float(a0_.x)); - index1 = i + RoundInt(Float(a0_.y)); - index2 = i + RoundInt(Float(a0_.z)); - index3 = i + RoundInt(Float(a0_.w)); - - // Clamp to constant register range, c[256] = {0, 0, 0, 0} - index0 = IfThenElse(UInt(index0) > UInt(256), Int(256), index0); - index1 = IfThenElse(UInt(index1) > UInt(256), Int(256), index1); - index2 = IfThenElse(UInt(index2) > UInt(256), Int(256), index2); - index3 = IfThenElse(UInt(index3) > UInt(256), Int(256), index3); - - c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16); - c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16); - c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16); - c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16); - - transpose4x4(c.x, c.y, c.z, c.w); - } - - return c; - } - - void VertexProgram::shader(Registers &r) - { - // vertexShader->print("VertexShader-%0.16llX.txt", state.shaderHash); - - unsigned short version = vertexShader->getVersion(); + unsigned short version = shader->getVersion(); r.enableIndex = 0; r.stackIndex = 0; - - for(int i = 0; i < vertexShader->getLength(); i++) + + // Create all call site return blocks up front + for(int i = 0; i < shader->getLength(); i++) { - const ShaderInstruction *instruction = vertexShader->getInstruction(i); - Op::Opcode opcode = instruction->getOpcode(); + const Shader::Instruction *instruction = shader->getInstruction(i); + Shader::Opcode opcode = instruction->opcode; - // #ifndef NDEBUG // FIXME: Centralize debug output control - // vertexShader->printInstruction(i, "debug.txt"); - // #endif + if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) + { + const Dst &dst = instruction->dst; - if(opcode == Op::OPCODE_DCL || opcode == Op::OPCODE_DEF || opcode == Op::OPCODE_DEFI || opcode == Op::OPCODE_DEFB) + ASSERT(callRetBlock[dst.label].size() == dst.callSite); + callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); + } + } + + for(int i = 0; i < shader->getLength(); i++) + { + const Shader::Instruction *instruction = shader->getInstruction(i); + Shader::Opcode opcode = instruction->opcode; + + if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) { continue; } - Dst dest = instruction->getDestinationParameter(); - Src src0 = instruction->getSourceParameter(0); - Src src1 = instruction->getSourceParameter(1); - Src src2 = instruction->getSourceParameter(2); - Src src3 = instruction->getSourceParameter(3); + Dst dst = instruction->dst; + Src src0 = instruction->src[0]; + Src src1 = instruction->src[1]; + Src src2 = instruction->src[2]; - bool predicate = instruction->isPredicate(); - int size = vertexShader->size(opcode); - Usage usage = instruction->getUsage(); - unsigned char usageIndex = instruction->getUsageIndex(); - Control control = instruction->getControl(); - bool integer = dest.type == Dst::PARAMETER_ADDR; - bool pp = dest.partialPrecision; + bool predicate = instruction->predicate; + int size = shader->size(opcode); + Usage usage = instruction->usage; + unsigned char usageIndex = instruction->usageIndex; + Control control = instruction->control; + bool integer = dst.type == Shader::PARAMETER_ADDR; + bool pp = dst.partialPrecision; - Color4f d; - Color4f s0; - Color4f s1; - Color4f s2; - Color4f s3; + Vector4f d; + Vector4f s0; + Vector4f s1; + Vector4f s2; - if(src0.type != Src::PARAMETER_VOID) s0 = reg(r, src0); - if(src1.type != Src::PARAMETER_VOID) s1 = reg(r, src1); - if(src2.type != Src::PARAMETER_VOID) s2 = reg(r, src2); - if(src3.type != Src::PARAMETER_VOID) s3 = reg(r, src3); + if(src0.type != Shader::PARAMETER_VOID) s0 = reg(r, src0); + if(src1.type != Shader::PARAMETER_VOID) s1 = reg(r, src1); + if(src2.type != Shader::PARAMETER_VOID) s2 = reg(r, src2); switch(opcode) { - case Op::OPCODE_VS_1_0: break; - case Op::OPCODE_VS_1_1: break; - case Op::OPCODE_VS_2_0: break; - case Op::OPCODE_VS_2_x: break; - case Op::OPCODE_VS_2_sw: break; - case Op::OPCODE_VS_3_0: break; - case Op::OPCODE_VS_3_sw: break; - case Op::OPCODE_DCL: break; - case Op::OPCODE_DEF: break; - case Op::OPCODE_DEFI: break; - case Op::OPCODE_DEFB: break; - case Op::OPCODE_NOP: break; - case Op::OPCODE_ABS: abs(d, s0); break; - case Op::OPCODE_ADD: add(d, s0, s1); break; - case Op::OPCODE_CRS: crs(d, s0, s1); break; - case Op::OPCODE_DP3: dp3(d, s0, s1); break; - case Op::OPCODE_DP4: dp4(d, s0, s1); break; - case Op::OPCODE_DST: dst(d, s0, s1); break; - case Op::OPCODE_EXP: exp(d, s0, pp); break; - case Op::OPCODE_EXPP: expp(d, s0, version); break; - case Op::OPCODE_FRC: frc(d, s0); break; - case Op::OPCODE_LIT: lit(d, s0); break; - case Op::OPCODE_LOG: log(d, s0, pp); break; - case Op::OPCODE_LOGP: logp(d, s0, version); break; - case Op::OPCODE_LRP: lrp(d, s0, s1, s2); break; - case Op::OPCODE_M3X2: M3X2(r, d, s0, src1); break; - case Op::OPCODE_M3X3: M3X3(r, d, s0, src1); break; - case Op::OPCODE_M3X4: M3X4(r, d, s0, src1); break; - case Op::OPCODE_M4X3: M4X3(r, d, s0, src1); break; - case Op::OPCODE_M4X4: M4X4(r, d, s0, src1); break; - case Op::OPCODE_MAD: mad(d, s0, s1, s2); break; - case Op::OPCODE_MAX: max(d, s0, s1); break; - case Op::OPCODE_MIN: min(d, s0, s1); break; - case Op::OPCODE_MOV: mov(d, s0, integer); break; - case Op::OPCODE_MOVA: mov(d, s0); break; - case Op::OPCODE_MUL: mul(d, s0, s1); break; - case Op::OPCODE_NRM: nrm(d, s0, pp); break; - case Op::OPCODE_POW: pow(d, s0, s1, pp); break; - case Op::OPCODE_RCP: rcp(d, s0, pp); break; - case Op::OPCODE_RSQ: rsq(d, s0, pp); break; - case Op::OPCODE_SGE: sge(d, s0, s1); break; - case Op::OPCODE_SGN: sgn(d, s0); break; - case Op::OPCODE_SINCOS: sincos(d, s0, pp); break; - case Op::OPCODE_SLT: slt(d, s0, s1); break; - case Op::OPCODE_SUB: sub(d, s0, s1); break; - case Op::OPCODE_BREAK: BREAK(r); break; - case Op::OPCODE_BREAKC: BREAKC(r, s0, s1, control); break; - case Op::OPCODE_BREAKP: BREAKP(r, src0); break; - case Op::OPCODE_CALL: CALL(r, dest.index); break; - case Op::OPCODE_CALLNZ: CALLNZ(r, dest.index, src0); break; - case Op::OPCODE_ELSE: ELSE(r); break; - case Op::OPCODE_ENDIF: ENDIF(r); break; - case Op::OPCODE_ENDLOOP: ENDLOOP(r); break; - case Op::OPCODE_ENDREP: ENDREP(r); break; - case Op::OPCODE_IF: IF(r, src0); break; - case Op::OPCODE_IFC: IFC(r, s0, s1, control); break; - case Op::OPCODE_LABEL: LABEL(dest.index); break; - case Op::OPCODE_LOOP: LOOP(r, src1); break; - case Op::OPCODE_REP: REP(r, src0); break; - case Op::OPCODE_RET: RET(r); break; - case Op::OPCODE_SETP: setp(d, s0, s1, control); break; - case Op::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1); break; - case Op::OPCODE_END: break; + case Shader::OPCODE_VS_1_0: break; + case Shader::OPCODE_VS_1_1: break; + case Shader::OPCODE_VS_2_0: break; + case Shader::OPCODE_VS_2_x: break; + case Shader::OPCODE_VS_2_sw: break; + case Shader::OPCODE_VS_3_0: break; + case Shader::OPCODE_VS_3_sw: break; + case Shader::OPCODE_DCL: break; + case Shader::OPCODE_DEF: break; + case Shader::OPCODE_DEFI: break; + case Shader::OPCODE_DEFB: break; + case Shader::OPCODE_NOP: break; + case Shader::OPCODE_ABS: abs(d, s0); break; + case Shader::OPCODE_ADD: add(d, s0, s1); break; + case Shader::OPCODE_CRS: crs(d, s0, s1); break; + case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; + case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; + case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; + case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; + case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; + case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; + case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; + case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; + case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; + case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; + case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; + case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; + case Shader::OPCODE_DP1: dp1(d, s0, s1); break; + case Shader::OPCODE_DP2: dp2(d, s0, s1); break; + case Shader::OPCODE_DP3: dp3(d, s0, s1); break; + case Shader::OPCODE_DP4: dp4(d, s0, s1); break; + case Shader::OPCODE_ATT: att(d, s0, s1); break; + case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; + case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; + case Shader::OPCODE_EXPP: expp(d, s0, version); break; + case Shader::OPCODE_EXP: exp(d, s0, pp); break; + case Shader::OPCODE_FRC: frc(d, s0); break; + case Shader::OPCODE_TRUNC: trunc(d, s0); break; + case Shader::OPCODE_FLOOR: floor(d, s0); break; + case Shader::OPCODE_CEIL: ceil(d, s0); break; + case Shader::OPCODE_LIT: lit(d, s0); break; + case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; + case Shader::OPCODE_LOG2: log2(d, s0, pp); break; + case Shader::OPCODE_LOGP: logp(d, s0, version); break; + case Shader::OPCODE_LOG: log(d, s0, pp); break; + case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; + case Shader::OPCODE_STEP: step(d, s0, s1); break; + case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; + case Shader::OPCODE_M3X2: M3X2(r, d, s0, src1); break; + case Shader::OPCODE_M3X3: M3X3(r, d, s0, src1); break; + case Shader::OPCODE_M3X4: M3X4(r, d, s0, src1); break; + case Shader::OPCODE_M4X3: M4X3(r, d, s0, src1); break; + case Shader::OPCODE_M4X4: M4X4(r, d, s0, src1); break; + case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; + case Shader::OPCODE_MAX: max(d, s0, s1); break; + case Shader::OPCODE_MIN: min(d, s0, s1); break; + case Shader::OPCODE_MOV: mov(d, s0, integer); break; + case Shader::OPCODE_MOVA: mov(d, s0); break; + case Shader::OPCODE_F2B: f2b(d, s0); break; + case Shader::OPCODE_B2F: b2f(d, s0); break; + case Shader::OPCODE_MUL: mul(d, s0, s1); break; + case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; + case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; + case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; + case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; + case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; + case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; + case Shader::OPCODE_DIV: div(d, s0, s1); break; + case Shader::OPCODE_MOD: mod(d, s0, s1); break; + case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; + case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; + case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; + case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; + case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; + case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; + case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; + case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; + case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; + case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; + case Shader::OPCODE_SGE: step(d, s1, s0); break; + case Shader::OPCODE_SGN: sgn(d, s0); break; + case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; + case Shader::OPCODE_COS: cos(d, s0, pp); break; + case Shader::OPCODE_SIN: sin(d, s0, pp); break; + case Shader::OPCODE_TAN: tan(d, s0); break; + case Shader::OPCODE_ACOS: acos(d, s0); break; + case Shader::OPCODE_ASIN: asin(d, s0); break; + case Shader::OPCODE_ATAN: atan(d, s0); break; + case Shader::OPCODE_ATAN2: atan2(d, s0, s1); break; + case Shader::OPCODE_SLT: slt(d, s0, s1); break; + case Shader::OPCODE_SUB: sub(d, s0, s1); break; + case Shader::OPCODE_BREAK: BREAK(r); break; + case Shader::OPCODE_BREAKC: BREAKC(r, s0, s1, control); break; + case Shader::OPCODE_BREAKP: BREAKP(r, src0); break; + case Shader::OPCODE_CONTINUE: CONTINUE(r); break; + case Shader::OPCODE_TEST: TEST(); break; + case Shader::OPCODE_CALL: CALL(r, dst.label, dst.callSite); break; + case Shader::OPCODE_CALLNZ: CALLNZ(r, dst.label, dst.callSite, src0); break; + case Shader::OPCODE_ELSE: ELSE(r); break; + case Shader::OPCODE_ENDIF: ENDIF(r); break; + case Shader::OPCODE_ENDLOOP: ENDLOOP(r); break; + case Shader::OPCODE_ENDREP: ENDREP(r); break; + case Shader::OPCODE_ENDWHILE: ENDWHILE(r); break; + case Shader::OPCODE_IF: IF(r, src0); break; + case Shader::OPCODE_IFC: IFC(r, s0, s1, control); break; + case Shader::OPCODE_LABEL: LABEL(dst.index); break; + case Shader::OPCODE_LOOP: LOOP(r, src1); break; + case Shader::OPCODE_REP: REP(r, src0); break; + case Shader::OPCODE_WHILE: WHILE(r, src0); break; + case Shader::OPCODE_RET: RET(r); break; + case Shader::OPCODE_LEAVE: LEAVE(r); break; + case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; + case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; + case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; + case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; + case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; + case Shader::OPCODE_ALL: all(d.x, s0); break; + case Shader::OPCODE_ANY: any(d.x, s0); break; + case Shader::OPCODE_NOT: not(d, s0); break; + case Shader::OPCODE_OR: or(d.x, s0.x, s1.x); break; + case Shader::OPCODE_XOR: xor(d.x, s0.x, s1.x); break; + case Shader::OPCODE_AND: and(d.x, s0.x, s1.x); break; + case Shader::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1); break; + case Shader::OPCODE_TEX: TEX(r, d, s0, src1); break; + case Shader::OPCODE_END: break; default: ASSERT(false); } - if(dest.type != Dst::PARAMETER_VOID && dest.type != Dst::PARAMETER_LABEL) + if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP) { - if(dest.saturate) + if(dst.integer) { - if(dest.x) d.r = Max(d.r, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - if(dest.y) d.g = Max(d.g, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - if(dest.z) d.b = Max(d.b, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - if(dest.w) d.a = Max(d.a, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - - if(dest.x) d.r = Min(d.r, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - if(dest.y) d.g = Min(d.g, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - if(dest.z) d.b = Min(d.b, Float4(1.0f, 1.0f, 1.0f, 1.0f)); - if(dest.w) d.a = Min(d.a, Float4(1.0f, 1.0f, 1.0f, 1.0f)); + switch(opcode) + { + case Shader::OPCODE_DIV: + if(dst.x) d.x = Trunc(d.x); + if(dst.y) d.y = Trunc(d.y); + if(dst.z) d.z = Trunc(d.z); + if(dst.w) d.w = Trunc(d.w); + break; + default: + break; // No truncation to integer required when arguments are integer + } } - if(vertexShader->containsDynamicBranching()) + if(dst.saturate) { - Color4f pDst; // FIXME: Rename + if(dst.x) d.x = Max(d.x, Float4(0.0f)); + if(dst.y) d.y = Max(d.y, Float4(0.0f)); + if(dst.z) d.z = Max(d.z, Float4(0.0f)); + if(dst.w) d.w = Max(d.w, Float4(0.0f)); - switch(dest.type) + if(dst.x) d.x = Min(d.x, Float4(1.0f)); + if(dst.y) d.y = Min(d.y, Float4(1.0f)); + if(dst.z) d.z = Min(d.z, Float4(1.0f)); + if(dst.w) d.w = Min(d.w, Float4(1.0f)); + } + + if(shader->containsDynamicBranching()) + { + Vector4f pDst; // FIXME: Rename + + switch(dst.type) { - case Dst::PARAMETER_VOID: break; - case Dst::PARAMETER_TEMP: pDst = r.r[dest.index]; break; - case Dst::PARAMETER_ADDR: pDst = r.a0; break; - case Dst::PARAMETER_RASTOUT: - switch(dest.index) + case Shader::PARAMETER_VOID: break; + case Shader::PARAMETER_TEMP: + if(dst.rel.type == Shader::PARAMETER_VOID) + { + if(dst.x) pDst.x = r.r[dst.index].x; + if(dst.y) pDst.y = r.r[dst.index].y; + if(dst.z) pDst.z = r.r[dst.index].z; + if(dst.w) pDst.w = r.r[dst.index].w; + } + else + { + Int a = relativeAddress(r, dst); + + if(dst.x) pDst.x = r.r[dst.index + a].x; + if(dst.y) pDst.y = r.r[dst.index + a].y; + if(dst.z) pDst.z = r.r[dst.index + a].z; + if(dst.w) pDst.w = r.r[dst.index + a].w; + } + break; + case Shader::PARAMETER_ADDR: pDst = r.a0; break; + case Shader::PARAMETER_RASTOUT: + switch(dst.index) { case 0: - if(dest.x) pDst.x = r.ox[Pos]; - if(dest.y) pDst.y = r.oy[Pos]; - if(dest.z) pDst.z = r.oz[Pos]; - if(dest.w) pDst.w = r.ow[Pos]; + if(dst.x) pDst.x = r.o[Pos].x; + if(dst.y) pDst.y = r.o[Pos].y; + if(dst.z) pDst.z = r.o[Pos].z; + if(dst.w) pDst.w = r.o[Pos].w; break; case 1: - pDst.x = r.ox[Fog]; + pDst.x = r.o[Fog].x; break; case 2: - pDst.x = r.oy[Pts]; + pDst.x = r.o[Pts].y; break; default: ASSERT(false); } break; - case Dst::PARAMETER_ATTROUT: - if(dest.x) pDst.x = r.ox[D0 + dest.index]; - if(dest.y) pDst.y = r.oy[D0 + dest.index]; - if(dest.z) pDst.z = r.oz[D0 + dest.index]; - if(dest.w) pDst.w = r.ow[D0 + dest.index]; + case Shader::PARAMETER_ATTROUT: + if(dst.x) pDst.x = r.o[D0 + dst.index].x; + if(dst.y) pDst.y = r.o[D0 + dst.index].y; + if(dst.z) pDst.z = r.o[D0 + dst.index].z; + if(dst.w) pDst.w = r.o[D0 + dst.index].w; break; - case Dst::PARAMETER_TEXCRDOUT: - // case Dst::PARAMETER_OUTPUT: + case Shader::PARAMETER_TEXCRDOUT: + // case Shader::PARAMETER_OUTPUT: if(version < 0x0300) { - if(dest.x) pDst.x = r.ox[T0 + dest.index]; - if(dest.y) pDst.y = r.oy[T0 + dest.index]; - if(dest.z) pDst.z = r.oz[T0 + dest.index]; - if(dest.w) pDst.w = r.ow[T0 + dest.index]; + if(dst.x) pDst.x = r.o[T0 + dst.index].x; + if(dst.y) pDst.y = r.o[T0 + dst.index].y; + if(dst.z) pDst.z = r.o[T0 + dst.index].z; + if(dst.w) pDst.w = r.o[T0 + dst.index].w; } else { - if(!dest.relative) + if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative { - if(dest.x) pDst.x = r.ox[dest.index]; - if(dest.y) pDst.y = r.oy[dest.index]; - if(dest.z) pDst.z = r.oz[dest.index]; - if(dest.w) pDst.w = r.ow[dest.index]; + if(dst.x) pDst.x = r.o[dst.index].x; + if(dst.y) pDst.y = r.o[dst.index].y; + if(dst.z) pDst.z = r.o[dst.index].z; + if(dst.w) pDst.w = r.o[dst.index].w; } - else + else if(dst.rel.type == Shader::PARAMETER_LOOP) { Int aL = r.aL[r.loopDepth]; - if(dest.x) pDst.x = r.ox[dest.index + aL]; - if(dest.y) pDst.y = r.oy[dest.index + aL]; - if(dest.z) pDst.z = r.oz[dest.index + aL]; - if(dest.w) pDst.w = r.ow[dest.index + aL]; + if(dst.x) pDst.x = r.o[dst.index + aL].x; + if(dst.y) pDst.y = r.o[dst.index + aL].y; + if(dst.z) pDst.z = r.o[dst.index + aL].z; + if(dst.w) pDst.w = r.o[dst.index + aL].w; + } + else + { + Int a = relativeAddress(r, dst); + + if(dst.x) pDst.x = r.o[dst.index + a].x; + if(dst.y) pDst.y = r.o[dst.index + a].y; + if(dst.z) pDst.z = r.o[dst.index + a].z; + if(dst.w) pDst.w = r.o[dst.index + a].w; } } break; - case Dst::PARAMETER_LABEL: break; - case Dst::PARAMETER_PREDICATE: pDst = r.p0; break; - case Dst::PARAMETER_INPUT: break; + case Shader::PARAMETER_LABEL: break; + case Shader::PARAMETER_PREDICATE: pDst = r.p0; break; + case Shader::PARAMETER_INPUT: break; default: ASSERT(false); } - Int4 enable = r.enableStack[r.enableIndex] & r.enableBreak; + Int4 enable = enableMask(r, instruction); Int4 xEnable = enable; Int4 yEnable = enable; @@ -358,119 +383,140 @@ if(predicate) { - unsigned char pSwizzle = instruction->getPredicateSwizzle(); + unsigned char pSwizzle = instruction->predicateSwizzle; Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03]; Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03]; Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03]; Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03]; - if(!instruction->isPredicateNot()) + if(!instruction->predicateNot) { - if(dest.x) xEnable = xEnable & As<Int4>(xPredicate); - if(dest.y) yEnable = yEnable & As<Int4>(yPredicate); - if(dest.z) zEnable = zEnable & As<Int4>(zPredicate); - if(dest.w) wEnable = wEnable & As<Int4>(wPredicate); + if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); + if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); + if(dst.z) zEnable = zEnable & As<Int4>(zPredicate); + if(dst.w) wEnable = wEnable & As<Int4>(wPredicate); } else { - if(dest.x) xEnable = xEnable & ~As<Int4>(xPredicate); - if(dest.y) yEnable = yEnable & ~As<Int4>(yPredicate); - if(dest.z) zEnable = zEnable & ~As<Int4>(zPredicate); - if(dest.w) wEnable = wEnable & ~As<Int4>(wPredicate); + if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate); + if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate); + if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate); + if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate); } } - if(dest.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); - if(dest.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); - if(dest.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); - if(dest.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); + if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); + if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); + if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); + if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); - if(dest.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); - if(dest.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); - if(dest.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); - if(dest.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); + if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); + if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); + if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); + if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); } - switch(dest.type) + switch(dst.type) { - case Dst::PARAMETER_VOID: + case Shader::PARAMETER_VOID: break; - case Dst::PARAMETER_TEMP: - if(dest.x) r.r[dest.index].x = d.x; - if(dest.y) r.r[dest.index].y = d.y; - if(dest.z) r.r[dest.index].z = d.z; - if(dest.w) r.r[dest.index].w = d.w; + case Shader::PARAMETER_TEMP: + if(dst.rel.type == Shader::PARAMETER_VOID) + { + if(dst.x) r.r[dst.index].x = d.x; + if(dst.y) r.r[dst.index].y = d.y; + if(dst.z) r.r[dst.index].z = d.z; + if(dst.w) r.r[dst.index].w = d.w; + } + else + { + Int a = relativeAddress(r, dst); + + if(dst.x) r.r[dst.index + a].x = d.x; + if(dst.y) r.r[dst.index + a].y = d.y; + if(dst.z) r.r[dst.index + a].z = d.z; + if(dst.w) r.r[dst.index + a].w = d.w; + } break; - case Dst::PARAMETER_ADDR: - if(dest.x) r.a0.x = d.x; - if(dest.y) r.a0.y = d.y; - if(dest.z) r.a0.z = d.z; - if(dest.w) r.a0.w = d.w; + case Shader::PARAMETER_ADDR: + if(dst.x) r.a0.x = d.x; + if(dst.y) r.a0.y = d.y; + if(dst.z) r.a0.z = d.z; + if(dst.w) r.a0.w = d.w; break; - case Dst::PARAMETER_RASTOUT: - switch(dest.index) + case Shader::PARAMETER_RASTOUT: + switch(dst.index) { case 0: - if(dest.x) r.ox[Pos] = d.x; - if(dest.y) r.oy[Pos] = d.y; - if(dest.z) r.oz[Pos] = d.z; - if(dest.w) r.ow[Pos] = d.w; + if(dst.x) r.o[Pos].x = d.x; + if(dst.y) r.o[Pos].y = d.y; + if(dst.z) r.o[Pos].z = d.z; + if(dst.w) r.o[Pos].w = d.w; break; case 1: - r.ox[Fog] = d.x; + r.o[Fog].x = d.x; break; case 2: - r.oy[Pts] = d.x; + r.o[Pts].y = d.x; break; default: ASSERT(false); } break; - case Dst::PARAMETER_ATTROUT: - if(dest.x) r.ox[D0 + dest.index] = d.x; - if(dest.y) r.oy[D0 + dest.index] = d.y; - if(dest.z) r.oz[D0 + dest.index] = d.z; - if(dest.w) r.ow[D0 + dest.index] = d.w; + case Shader::PARAMETER_ATTROUT: + if(dst.x) r.o[D0 + dst.index].x = d.x; + if(dst.y) r.o[D0 + dst.index].y = d.y; + if(dst.z) r.o[D0 + dst.index].z = d.z; + if(dst.w) r.o[D0 + dst.index].w = d.w; break; - case Dst::PARAMETER_TEXCRDOUT: - // case Dst::PARAMETER_OUTPUT: + case Shader::PARAMETER_TEXCRDOUT: + // case Shader::PARAMETER_OUTPUT: if(version < 0x0300) { - if(dest.x) r.ox[T0 + dest.index] = d.x; - if(dest.y) r.oy[T0 + dest.index] = d.y; - if(dest.z) r.oz[T0 + dest.index] = d.z; - if(dest.w) r.ow[T0 + dest.index] = d.w; + if(dst.x) r.o[T0 + dst.index].x = d.x; + if(dst.y) r.o[T0 + dst.index].y = d.y; + if(dst.z) r.o[T0 + dst.index].z = d.z; + if(dst.w) r.o[T0 + dst.index].w = d.w; } else { - if(!dest.relative) + if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative { - if(dest.x) r.ox[dest.index] = d.x; - if(dest.y) r.oy[dest.index] = d.y; - if(dest.z) r.oz[dest.index] = d.z; - if(dest.w) r.ow[dest.index] = d.w; + if(dst.x) r.o[dst.index].x = d.x; + if(dst.y) r.o[dst.index].y = d.y; + if(dst.z) r.o[dst.index].z = d.z; + if(dst.w) r.o[dst.index].w = d.w; } - else + else if(dst.rel.type == Shader::PARAMETER_LOOP) { Int aL = r.aL[r.loopDepth]; - if(dest.x) r.ox[dest.index + aL] = d.x; - if(dest.y) r.oy[dest.index + aL] = d.y; - if(dest.z) r.oz[dest.index + aL] = d.z; - if(dest.w) r.ow[dest.index + aL] = d.w; + if(dst.x) r.o[dst.index + aL].x = d.x; + if(dst.y) r.o[dst.index + aL].y = d.y; + if(dst.z) r.o[dst.index + aL].z = d.z; + if(dst.w) r.o[dst.index + aL].w = d.w; + } + else + { + Int a = relativeAddress(r, dst); + + if(dst.x) r.o[dst.index + a].x = d.x; + if(dst.y) r.o[dst.index + a].y = d.y; + if(dst.z) r.o[dst.index + a].z = d.z; + if(dst.w) r.o[dst.index + a].w = d.w; } } break; - case Dst::PARAMETER_LABEL: break; - case Dst::PARAMETER_PREDICATE: r.p0 = d; break; - case Dst::PARAMETER_INPUT: break; + case Shader::PARAMETER_LABEL: break; + case Shader::PARAMETER_PREDICATE: r.p0 = d; break; + case Shader::PARAMETER_INPUT: break; default: ASSERT(false); } } } - if(returns) + if(currentLabel != -1) { Nucleus::setInsertBlock(returnBlock); } @@ -478,40 +524,40 @@ void VertexProgram::passThrough(Registers &r) { - if(vertexShader) + if(shader) { for(int i = 0; i < 12; i++) { - unsigned char usage = vertexShader->output[i][0].usage; - unsigned char index = vertexShader->output[i][0].index; + unsigned char usage = shader->output[i][0].usage; + unsigned char index = shader->output[i][0].index; switch(usage) { case 0xFF: continue; - case ShaderOperation::USAGE_PSIZE: - r.oy[i] = r.v[i].x; + case Shader::USAGE_PSIZE: + r.o[i].y = r.v[i].x; break; - case ShaderOperation::USAGE_TEXCOORD: - r.ox[i] = r.v[i].x; - r.oy[i] = r.v[i].y; - r.oz[i] = r.v[i].z; - r.ow[i] = r.v[i].w; + case Shader::USAGE_TEXCOORD: + r.o[i].x = r.v[i].x; + r.o[i].y = r.v[i].y; + r.o[i].z = r.v[i].z; + r.o[i].w = r.v[i].w; break; - case ShaderOperation::USAGE_POSITION: - r.ox[i] = r.v[i].x; - r.oy[i] = r.v[i].y; - r.oz[i] = r.v[i].z; - r.ow[i] = r.v[i].w; + case Shader::USAGE_POSITION: + r.o[i].x = r.v[i].x; + r.o[i].y = r.v[i].y; + r.o[i].z = r.v[i].z; + r.o[i].w = r.v[i].w; break; - case ShaderOperation::USAGE_COLOR: - r.ox[i] = r.v[i].x; - r.oy[i] = r.v[i].y; - r.oz[i] = r.v[i].z; - r.ow[i] = r.v[i].w; + case Shader::USAGE_COLOR: + r.o[i].x = r.v[i].x; + r.o[i].y = r.v[i].y; + r.o[i].z = r.v[i].z; + r.o[i].w = r.v[i].w; break; - case ShaderOperation::USAGE_FOG: - r.ox[i] = r.v[i].x; + case Shader::USAGE_FOG: + r.o[i].x = r.v[i].x; break; default: ASSERT(false); @@ -520,60 +566,99 @@ } else { - r.ox[Pos] = r.v[PositionT].x; - r.oy[Pos] = r.v[PositionT].y; - r.oz[Pos] = r.v[PositionT].z; - r.ow[Pos] = r.v[PositionT].w; + r.o[Pos].x = r.v[PositionT].x; + r.o[Pos].y = r.v[PositionT].y; + r.o[Pos].z = r.v[PositionT].z; + r.o[Pos].w = r.v[PositionT].w; for(int i = 0; i < 2; i++) { - r.ox[D0 + i] = r.v[Color0 + i].x; - r.oy[D0 + i] = r.v[Color0 + i].y; - r.oz[D0 + i] = r.v[Color0 + i].z; - r.ow[D0 + i] = r.v[Color0 + i].w; + r.o[D0 + i].x = r.v[Color0 + i].x; + r.o[D0 + i].y = r.v[Color0 + i].y; + r.o[D0 + i].z = r.v[Color0 + i].z; + r.o[D0 + i].w = r.v[Color0 + i].w; } for(int i = 0; i < 8; i++) { - r.ox[T0 + i] = r.v[TexCoord0 + i].x; - r.oy[T0 + i] = r.v[TexCoord0 + i].y; - r.oz[T0 + i] = r.v[TexCoord0 + i].z; - r.ow[T0 + i] = r.v[TexCoord0 + i].w; + r.o[T0 + i].x = r.v[TexCoord0 + i].x; + r.o[T0 + i].y = r.v[TexCoord0 + i].y; + r.o[T0 + i].z = r.v[TexCoord0 + i].z; + r.o[T0 + i].w = r.v[TexCoord0 + i].w; } - r.oy[Pts] = r.v[PSize].x; + r.o[Pts].y = r.v[PSize].x; } } - Color4f VertexProgram::reg(Registers &r, const Src &src, int offset) + Vector4f VertexProgram::reg(Registers &r, const Src &src, int offset) { int i = src.index + offset; - Color4f reg; + Vector4f reg; - if(src.type == Src::PARAMETER_CONST) - { - reg = readConstant(r, src, offset); - } - switch(src.type) { - case Src::PARAMETER_TEMP: reg = r.r[i]; break; - case Src::PARAMETER_CONST: break; - case Src::PARAMETER_INPUT: reg = r.v[i]; break; - case Src::PARAMETER_VOID: return r.r[0]; // Dummy - case Src::PARAMETER_FLOATLITERAL: return r.r[0]; // Dummy - case Src::PARAMETER_ADDR: reg = r.a0; break; - case Src::PARAMETER_CONSTBOOL: return r.r[0]; // Dummy - case Src::PARAMETER_CONSTINT: return r.r[0]; // Dummy - case Src::PARAMETER_LOOP: return r.r[0]; // Dummy - case Src::PARAMETER_PREDICATE: return r.r[0]; // Dummy - case Src::PARAMETER_SAMPLER: return r.r[0]; // Dummy + case Shader::PARAMETER_TEMP: + if(src.rel.type == Shader::PARAMETER_VOID) + { + reg = r.r[i]; + } + else + { + reg = r.r[i + relativeAddress(r, src)]; + } + break; + case Shader::PARAMETER_CONST: + reg = readConstant(r, src, offset); + break; + case Shader::PARAMETER_INPUT: + if(src.rel.type == Shader::PARAMETER_VOID) + { + reg = r.v[i]; + } + else + { + reg = r.v[i + relativeAddress(r, src)]; + } + break; + case Shader::PARAMETER_VOID: return r.r[0]; // Dummy + case Shader::PARAMETER_FLOAT4LITERAL: + reg.x = Float4(src.value[0]); + reg.y = Float4(src.value[1]); + reg.z = Float4(src.value[2]); + reg.w = Float4(src.value[3]); + break; + case Shader::PARAMETER_ADDR: reg = r.a0; break; + case Shader::PARAMETER_CONSTBOOL: return r.r[0]; // Dummy + case Shader::PARAMETER_CONSTINT: return r.r[0]; // Dummy + case Shader::PARAMETER_LOOP: return r.r[0]; // Dummy + case Shader::PARAMETER_PREDICATE: return r.r[0]; // Dummy + case Shader::PARAMETER_SAMPLER: + if(src.rel.type == Shader::PARAMETER_VOID) + { + reg.x = As<Float4>(Int4(i)); + } + else if(src.rel.type == Shader::PARAMETER_TEMP) + { + reg.x = As<Float4>(Int4(i) + RoundInt(r.r[src.rel.index].x)); + } + return reg; + case Shader::PARAMETER_OUTPUT: + if(src.rel.type == Shader::PARAMETER_VOID) + { + reg = r.o[i]; + } + else + { + reg = r.o[i + relativeAddress(r, src)]; + } + break; default: ASSERT(false); } - Color4f mod; + Vector4f mod; mod.x = reg[(src.swizzle >> 0) & 0x03]; mod.y = reg[(src.swizzle >> 2) & 0x03]; @@ -582,54 +667,54 @@ switch(src.modifier) { - case Src::MODIFIER_NONE: + case Shader::MODIFIER_NONE: break; - case Src::MODIFIER_NEGATE: + case Shader::MODIFIER_NEGATE: mod.x = -mod.x; mod.y = -mod.y; mod.z = -mod.z; mod.w = -mod.w; break; - case Src::MODIFIER_BIAS: + case Shader::MODIFIER_BIAS: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_BIAS_NEGATE: + case Shader::MODIFIER_BIAS_NEGATE: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_SIGN: + case Shader::MODIFIER_SIGN: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_SIGN_NEGATE: + case Shader::MODIFIER_SIGN_NEGATE: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_COMPLEMENT: + case Shader::MODIFIER_COMPLEMENT: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_X2: + case Shader::MODIFIER_X2: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_X2_NEGATE: + case Shader::MODIFIER_X2_NEGATE: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_DZ: + case Shader::MODIFIER_DZ: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_DW: + case Shader::MODIFIER_DW: ASSERT(false); // NOTE: Unimplemented break; - case Src::MODIFIER_ABS: + case Shader::MODIFIER_ABS: mod.x = Abs(mod.x); mod.y = Abs(mod.y); mod.z = Abs(mod.z); mod.w = Abs(mod.w); break; - case Src::MODIFIER_ABS_NEGATE: + case Shader::MODIFIER_ABS_NEGATE: mod.x = -Abs(mod.x); mod.y = -Abs(mod.y); mod.z = -Abs(mod.z); mod.w = -Abs(mod.w); break; - case Src::MODIFIER_NOT: + case Shader::MODIFIER_NOT: UNIMPLEMENTED(); break; default: @@ -639,32 +724,177 @@ return mod; } - void VertexProgram::M3X2(Registers &r, Color4f &dst, Color4f &src0, Src &src1) + Vector4f VertexProgram::readConstant(Registers &r, const Src &src, int offset) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); + Vector4f c; + + int i = src.index + offset; + + if(src.rel.type == Shader::PARAMETER_VOID) // Not relative + { + c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i])); + + c.x = c.x.xxxx; + c.y = c.y.yyyy; + c.z = c.z.zzzz; + c.w = c.w.wwww; + + if(localShaderConstants) // Constant may be known at compile time + { + for(int j = 0; j < shader->getLength(); j++) + { + const Shader::Instruction &instruction = *shader->getInstruction(j); + + if(instruction.opcode == Shader::OPCODE_DEF) + { + if(instruction.dst.index == i) + { + c.x = Float4(instruction.src[0].value[0]); + c.y = Float4(instruction.src[0].value[1]); + c.z = Float4(instruction.src[0].value[2]); + c.w = Float4(instruction.src[0].value[3]); + + break; + } + } + } + } + } + else if(src.rel.type == Shader::PARAMETER_LOOP) + { + Int loopCounter = r.aL[r.loopDepth]; + + c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16); + + c.x = c.x.xxxx; + c.y = c.y.yyyy; + c.z = c.z.zzzz; + c.w = c.w.wwww; + } + else + { + if(src.rel.deterministic) + { + Int a = relativeAddress(r, src); + + c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + a * 16); + + c.x = c.x.xxxx; + c.y = c.y.yyyy; + c.z = c.z.zzzz; + c.w = c.w.wwww; + } + else + { + int component = src.rel.swizzle & 0x03; + Float4 a; + + switch(src.rel.type) + { + case Shader::PARAMETER_ADDR: a = r.a0[component]; break; + case Shader::PARAMETER_TEMP: a = r.r[src.rel.index][component]; break; + case Shader::PARAMETER_INPUT: a = r.v[src.rel.index][component]; break; + case Shader::PARAMETER_OUTPUT: a = r.o[src.rel.index][component]; break; + case Shader::PARAMETER_CONST: a = Float4(*Pointer<Float>(r.data + OFFSET(DrawData,vs.c[src.rel.index][component]))); break; + default: ASSERT(false); + } + + Int4 index = Int4(i) + RoundInt(a) * Int4(src.rel.scale); + + index = Min(As<UInt4>(index), UInt4(256)); // Clamp to constant register range, c[256] = {0, 0, 0, 0} + + Int index0 = Extract(index, 0); + Int index1 = Extract(index, 1); + Int index2 = Extract(index, 2); + Int index3 = Extract(index, 3); + + c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16); + c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16); + c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16); + c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16); + + transpose4x4(c.x, c.y, c.z, c.w); + } + } + + return c; + } + + Int VertexProgram::relativeAddress(Registers &r, const Shader::Parameter &var) + { + ASSERT(var.rel.deterministic); + + if(var.rel.type == Shader::PARAMETER_TEMP) + { + return RoundInt(Extract(r.r[var.rel.index].x, 0)) * var.rel.scale; + } + else if(var.rel.type == Shader::PARAMETER_INPUT) + { + return RoundInt(Extract(r.v[var.rel.index].x, 0)) * var.rel.scale; + } + else if(var.rel.type == Shader::PARAMETER_OUTPUT) + { + return RoundInt(Extract(r.o[var.rel.index].x, 0)) * var.rel.scale; + } + else if(var.rel.type == Shader::PARAMETER_CONST) + { + RValue<Float4> c = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[var.rel.index])); + + return RoundInt(Extract(c, 0)) * var.rel.scale; + } + else ASSERT(false); + + return 0; + } + + Int4 VertexProgram::enableMask(Registers &r, const Shader::Instruction *instruction) + { + Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF); + + if(shader->containsBreakInstruction() && !whileTest && instruction->analysisBreak) + { + enable &= r.enableBreak; + } + + if(shader->containsContinueInstruction() && !whileTest && instruction->analysisContinue) + { + enable &= r.enableContinue; + } + + if(shader->containsLeaveInstruction() && instruction->analysisLeave) + { + enable &= r.enableLeave; + } + + return enable; + } + + void VertexProgram::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1) + { + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); dst.x = dot3(src0, row0); dst.y = dot3(src0, row1); } - void VertexProgram::M3X3(Registers &r, Color4f &dst, Color4f &src0, Src &src1) + void VertexProgram::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); - Color4f row2 = reg(r, src1, 2); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); + Vector4f row2 = reg(r, src1, 2); dst.x = dot3(src0, row0); dst.y = dot3(src0, row1); dst.z = dot3(src0, row2); } - void VertexProgram::M3X4(Registers &r, Color4f &dst, Color4f &src0, Src &src1) + void VertexProgram::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); - Color4f row2 = reg(r, src1, 2); - Color4f row3 = reg(r, src1, 3); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); + Vector4f row2 = reg(r, src1, 2); + Vector4f row3 = reg(r, src1, 3); dst.x = dot3(src0, row0); dst.y = dot3(src0, row1); @@ -672,23 +902,23 @@ dst.w = dot3(src0, row3); } - void VertexProgram::M4X3(Registers &r, Color4f &dst, Color4f &src0, Src &src1) + void VertexProgram::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); - Color4f row2 = reg(r, src1, 2); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); + Vector4f row2 = reg(r, src1, 2); dst.x = dot4(src0, row0); dst.y = dot4(src0, row1); dst.z = dot4(src0, row2); } - void VertexProgram::M4X4(Registers &r, Color4f &dst, Color4f &src0, Src &src1) + void VertexProgram::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1) { - Color4f row0 = reg(r, src1, 0); - Color4f row1 = reg(r, src1, 1); - Color4f row2 = reg(r, src1, 2); - Color4f row3 = reg(r, src1, 3); + Vector4f row0 = reg(r, src1, 0); + Vector4f row1 = reg(r, src1, 1); + Vector4f row2 = reg(r, src1, 2); + Vector4f row3 = reg(r, src1, 3); dst.x = dot4(src0, row0); dst.y = dot4(src0, row1); @@ -703,6 +933,7 @@ if(breakDepth == 0) { + r.enableIndex = r.enableIndex - breakDepth; Nucleus::createBr(endBlock); } else @@ -710,49 +941,47 @@ r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex]; Bool allBreak = SignMask(r.enableBreak) == 0x0; + r.enableIndex = r.enableIndex - breakDepth; branch(allBreak, endBlock, deadBlock); } Nucleus::setInsertBlock(deadBlock); + r.enableIndex = r.enableIndex + breakDepth; } - void VertexProgram::BREAKC(Registers &r, Color4f &src0, Color4f &src1, Control control) + void VertexProgram::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control) { Int4 condition; switch(control) { - case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; - case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; - case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; - case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; - case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; - case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; + case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; + case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; + case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; + case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; + case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; + case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; default: ASSERT(false); } - condition &= r.enableStack[r.enableIndex]; - - llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock(); - llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; - - r.enableBreak = r.enableBreak & ~condition; - Bool allBreak = SignMask(r.enableBreak) == 0x0; - - branch(allBreak, endBlock, continueBlock); - Nucleus::setInsertBlock(continueBlock); + BREAK(r, condition); } void VertexProgram::BREAKP(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC { Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); - if(predicateRegister.modifier == Src::MODIFIER_NOT) + if(predicateRegister.modifier == Shader::MODIFIER_NOT) { condition = ~condition; } + BREAK(r, condition); + } + + void VertexProgram::BREAK(Registers &r, Int4 &condition) + { condition &= r.enableStack[r.enableIndex]; llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock(); @@ -761,44 +990,61 @@ r.enableBreak = r.enableBreak & ~condition; Bool allBreak = SignMask(r.enableBreak) == 0x0; + r.enableIndex = r.enableIndex - breakDepth; branch(allBreak, endBlock, continueBlock); + Nucleus::setInsertBlock(continueBlock); + r.enableIndex = r.enableIndex + breakDepth; } - void VertexProgram::CALL(Registers &r, int labelIndex) + void VertexProgram::CONTINUE(Registers &r) + { + r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex]; + } + + void VertexProgram::TEST() + { + whileTest = true; + } + + void VertexProgram::CALL(Registers &r, int labelIndex, int callSiteIndex) { if(!labelBlock[labelIndex]) { labelBlock[labelIndex] = Nucleus::createBasicBlock(); } - llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); - callRetBlock.push_back(retBlock); + if(callRetBlock[labelIndex].size() > 1) + { + r.callStack[r.stackIndex++] = UInt(callSiteIndex); + } - r.callStack[r.stackIndex++] = UInt((unsigned int)callRetBlock.size() - 1); // FIXME + Int4 restoreLeave = r.enableLeave; Nucleus::createBr(labelBlock[labelIndex]); - Nucleus::setInsertBlock(retBlock); + Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); + + r.enableLeave = restoreLeave; } - void VertexProgram::CALLNZ(Registers &r, int labelIndex, const Src &src) + void VertexProgram::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src) { - if(src.type == Src::PARAMETER_CONSTBOOL) + if(src.type == Shader::PARAMETER_CONSTBOOL) { - CALLNZb(r, labelIndex, src); + CALLNZb(r, labelIndex, callSiteIndex, src); } - else if(src.type == Src::PARAMETER_PREDICATE) + else if(src.type == Shader::PARAMETER_PREDICATE) { - CALLNZp(r, labelIndex, src); + CALLNZp(r, labelIndex, callSiteIndex, src); } else ASSERT(false); } - void VertexProgram::CALLNZb(Registers &r, int labelIndex, const Src &boolRegister) + void VertexProgram::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister) { Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME - if(boolRegister.modifier == Src::MODIFIER_NOT) + if(boolRegister.modifier == Shader::MODIFIER_NOT) { condition = !condition; } @@ -808,20 +1054,24 @@ labelBlock[labelIndex] = Nucleus::createBasicBlock(); } - llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); - callRetBlock.push_back(retBlock); + if(callRetBlock[labelIndex].size() > 1) + { + r.callStack[r.stackIndex++] = UInt(callSiteIndex); + } - r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME + Int4 restoreLeave = r.enableLeave; - branch(condition, labelBlock[labelIndex], retBlock); - Nucleus::setInsertBlock(retBlock); + branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); + Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); + + r.enableLeave = restoreLeave; } - void VertexProgram::CALLNZp(Registers &r, int labelIndex, const Src &predicateRegister) + void VertexProgram::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister) { Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); - if(predicateRegister.modifier == Src::MODIFIER_NOT) + if(predicateRegister.modifier == Shader::MODIFIER_NOT) { condition = ~condition; } @@ -833,20 +1083,21 @@ labelBlock[labelIndex] = Nucleus::createBasicBlock(); } - llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); - callRetBlock.push_back(retBlock); - - r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME + if(callRetBlock[labelIndex].size() > 1) + { + r.callStack[r.stackIndex++] = UInt(callSiteIndex); + } r.enableIndex++; r.enableStack[r.enableIndex] = condition; + Int4 restoreLeave = r.enableLeave; - Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; - - branch(notAllFalse, labelBlock[labelIndex], retBlock); - Nucleus::setInsertBlock(retBlock); + Bool notAllFalse = SignMask(condition) != 0; + branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); + Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); r.enableIndex--; + r.enableLeave = restoreLeave; } void VertexProgram::ELSE(Registers &r) @@ -859,7 +1110,7 @@ if(isConditionalIf[ifDepth]) { Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1]; - Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; + Bool notAllFalse = SignMask(condition) != 0; branch(notAllFalse, falseBlock, endBlock); @@ -892,20 +1143,6 @@ } } - void VertexProgram::ENDREP(Registers &r) - { - loopRepDepth--; - - llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; - llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; - - Nucleus::createBr(testBlock); - Nucleus::setInsertBlock(endBlock); - - r.loopDepth--; - r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - } - void VertexProgram::ENDLOOP(Registers &r) { loopRepDepth--; @@ -922,17 +1159,50 @@ r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); } + void VertexProgram::ENDREP(Registers &r) + { + loopRepDepth--; + + llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; + llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; + + Nucleus::createBr(testBlock); + Nucleus::setInsertBlock(endBlock); + + r.loopDepth--; + r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + void VertexProgram::ENDWHILE(Registers &r) + { + loopRepDepth--; + + llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; + llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; + + Nucleus::createBr(testBlock); + Nucleus::setInsertBlock(endBlock); + + r.enableIndex--; + r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + whileTest = false; + } + void VertexProgram::IF(Registers &r, const Src &src) { - if(src.type == Src::PARAMETER_CONSTBOOL) + if(src.type == Shader::PARAMETER_CONSTBOOL) { IFb(r, src); } - else if(src.type == Src::PARAMETER_PREDICATE) + else if(src.type == Shader::PARAMETER_PREDICATE) { IFp(r, src); } - else ASSERT(false); + else + { + Int4 condition = As<Int4>(reg(r, src).x); + IF(r, condition); + } } void VertexProgram::IFb(Registers &r, const Src &boolRegister) @@ -941,9 +1211,9 @@ Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME - if(boolRegister.modifier == Src::MODIFIER_NOT) + if(boolRegister.modifier == Shader::MODIFIER_NOT) { - condition = !condition; + condition = !condition; } llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); @@ -957,50 +1227,39 @@ ifDepth++; } - void VertexProgram::IFp(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with IFC + void VertexProgram::IFp(Registers &r, const Src &predicateRegister) { Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); - if(predicateRegister.modifier == Src::MODIFIER_NOT) + if(predicateRegister.modifier == Shader::MODIFIER_NOT) { condition = ~condition; } - condition &= r.enableStack[r.enableIndex]; - - r.enableIndex++; - r.enableStack[r.enableIndex] = condition; - - llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); - llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); - - Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; - - branch(notAllFalse, trueBlock, falseBlock); - - isConditionalIf[ifDepth] = true; - ifFalseBlock[ifDepth] = falseBlock; - - ifDepth++; - breakDepth++; + IF(r, condition); } - void VertexProgram::IFC(Registers &r, Color4f &src0, Color4f &src1, Control control) + void VertexProgram::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control) { Int4 condition; switch(control) { - case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; - case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; - case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; - case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; - case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; - case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; + case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; + case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; + case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; + case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; + case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; + case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; default: ASSERT(false); } + IF(r, condition); + } + + void VertexProgram::IF(Registers &r, Int4 &condition) + { condition &= r.enableStack[r.enableIndex]; r.enableIndex++; @@ -1009,7 +1268,7 @@ llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); - Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; + Bool notAllFalse = SignMask(condition) != 0; branch(notAllFalse, trueBlock, falseBlock); @@ -1022,7 +1281,13 @@ void VertexProgram::LABEL(int labelIndex) { + if(!labelBlock[labelIndex]) + { + labelBlock[labelIndex] = Nucleus::createBasicBlock(); + } + Nucleus::setInsertBlock(labelBlock[labelIndex]); + currentLabel = labelIndex; } void VertexProgram::LOOP(Registers &r, const Src &integerRegister) @@ -1086,27 +1351,73 @@ breakDepth = 0; } + void VertexProgram::WHILE(Registers &r, const Src &temporaryRegister) + { + r.enableIndex++; + + llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); + llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); + llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); + + loopRepTestBlock[loopRepDepth] = testBlock; + loopRepEndBlock[loopRepDepth] = endBlock; + + Int4 restoreBreak = r.enableBreak; + Int4 restoreContinue = r.enableContinue; + + // FIXME: jump(testBlock) + Nucleus::createBr(testBlock); + Nucleus::setInsertBlock(testBlock); + r.enableContinue = restoreContinue; + + Vector4f &src = reg(r, temporaryRegister); + Int4 condition = As<Int4>(src.x); + condition &= r.enableStack[r.enableIndex - 1]; + r.enableStack[r.enableIndex] = condition; + + Bool notAllFalse = SignMask(condition) != 0; + branch(notAllFalse, loopBlock, endBlock); + + Nucleus::setInsertBlock(endBlock); + r.enableBreak = restoreBreak; + + Nucleus::setInsertBlock(loopBlock); + + loopRepDepth++; + breakDepth = 0; + } + void VertexProgram::RET(Registers &r) { - if(!returns) + if(currentLabel == -1) { returnBlock = Nucleus::createBasicBlock(); Nucleus::createBr(returnBlock); - - returns = true; } else { - // FIXME: Encapsulate - UInt index = r.callStack[--r.stackIndex]; - llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); - llvm::Value *value = Nucleus::createLoad(index.address); - llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock.size()); - for(unsigned int i = 0; i < callRetBlock.size(); i++) + if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack { - Nucleus::addSwitchCase(switchInst, i, callRetBlock[i]); + // FIXME: Encapsulate + UInt index = r.callStack[--r.stackIndex]; + + llvm::Value *value = Nucleus::createLoad(index.address); + llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); + + for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) + { + Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]); + } + } + else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination + { + Nucleus::createBr(callRetBlock[currentLabel][0]); + } + else // Function isn't called + { + Nucleus::createBr(unreachableBlock); } Nucleus::setInsertBlock(unreachableBlock); @@ -1114,17 +1425,60 @@ } } - void VertexProgram::TEXLDL(Registers &r, Color4f &dst, Color4f &src0, const Src &src1) + void VertexProgram::LEAVE(Registers &r) { - Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + src1.index * sizeof(Texture); + r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex]; - Color4f tmp; + // FIXME: Return from function if all instances left + // FIXME: Use enableLeave in other control-flow constructs + } - sampler[src1.index]->sampleTexture(texture, tmp, src0.x, src0.y, src0.z, src0.w, src0, src0, false, false, true); + void VertexProgram::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1) + { + Vector4f tmp; + sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w); dst.x = tmp[(src1.swizzle >> 0) & 0x3]; dst.y = tmp[(src1.swizzle >> 2) & 0x3]; dst.z = tmp[(src1.swizzle >> 4) & 0x3]; dst.w = tmp[(src1.swizzle >> 6) & 0x3]; } + + void VertexProgram::TEX(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1) + { + Float4 lod = Float4(0.0f); + Vector4f tmp; + sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, lod); + + dst.x = tmp[(src1.swizzle >> 0) & 0x3]; + dst.y = tmp[(src1.swizzle >> 2) & 0x3]; + dst.z = tmp[(src1.swizzle >> 4) & 0x3]; + dst.w = tmp[(src1.swizzle >> 6) & 0x3]; + } + + void VertexProgram::sampleTexture(Registers &r, Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q) + { + if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID) + { + Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture); + sampler[s.index]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true); + } + else + { + Int index = As<Int>(Float(reg(r, s).x.x)); + + for(int i = 0; i < 16; i++) + { + if(shader->usesSampler(i)) + { + If(index == i) + { + Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture); + sampler[i]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true); + // FIXME: When the sampler states are the same, we could use one sampler and just index the texture + } + } + } + } + } }
diff --git a/src/Shader/VertexProgram.hpp b/src/Shader/VertexProgram.hpp index bae97f3..7423e5b 100644 --- a/src/Shader/VertexProgram.hpp +++ b/src/Shader/VertexProgram.hpp
@@ -1,94 +1,104 @@ -// SwiftShader Software Renderer -// -// Copyright(c) 2005-2011 TransGaming Inc. -// -// All rights reserved. No part of this software may be copied, distributed, transmitted, -// transcribed, stored in a retrieval system, translated into any human or computer -// language by any means, or disclosed to third parties without the explicit written -// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express -// or implied, including but not limited to any patent rights, are granted to you. -// - -#ifndef sw_VertexProgram_hpp -#define sw_VertexProgram_hpp - -#include "VertexRoutine.hpp" -#include "ShaderCore.hpp" - -#include "Stream.hpp" -#include "Types.hpp" - -namespace sw -{ - struct Stream; - class VertexShader; - class SamplerCore; - - class VertexProgram : public VertexRoutine, public ShaderCore - { - public: - VertexProgram(const VertexProcessor::State &state, const VertexShader *vertexShader); - - virtual ~VertexProgram(); - - private: - typedef Shader::Instruction::DestinationParameter Dst; - typedef Shader::Instruction::SourceParameter Src; - typedef Shader::Instruction::Operation Op; - typedef Shader::Instruction::Operation::Control Control; - typedef Shader::Instruction::Operation::Usage Usage; - - Color4f readConstant(Registers &r, const Src &src, int offset = 0); - void pipeline(Registers &r); - void shader(Registers &r); - void passThrough(Registers &r); - - Color4f reg(Registers &r, const Src &src, int offset = 0); - - void M3X2(Registers &r, Color4f &dst, Color4f &src0, Src &src1); - void M3X3(Registers &r, Color4f &dst, Color4f &src0, Src &src1); - void M3X4(Registers &r, Color4f &dst, Color4f &src0, Src &src1); - void M4X3(Registers &r, Color4f &dst, Color4f &src0, Src &src1); - void M4X4(Registers &r, Color4f &dst, Color4f &src0, Src &src1); - void BREAK(Registers &r); - void BREAKC(Registers &r, Color4f &src0, Color4f &src1, Control); - void BREAKP(Registers &r, const Src &predicateRegister); - void CALL(Registers &r, int labelIndex); - void CALLNZ(Registers &r, int labelIndex, const Src &src); - void CALLNZb(Registers &r, int labelIndex, const Src &boolRegister); - void CALLNZp(Registers &r, int labelIndex, const Src &predicateRegister); - void ELSE(Registers &r); - void ENDIF(Registers &r); - void ENDLOOP(Registers &r); - void ENDREP(Registers &r); - void IF(Registers &r, const Src &src); - void IFb(Registers &r, const Src &boolRegister); - void IFp(Registers &r, const Src &predicateRegister); - void IFC(Registers &r, Color4f &src0, Color4f &src1, Control); - void LABEL(int labelIndex); - void LOOP(Registers &r, const Src &integerRegister); - void REP(Registers &r, const Src &integerRegister); - void RET(Registers &r); - void TEXLDL(Registers &r, Color4f &dst, Color4f &src, const Src&); - - SamplerCore *sampler[4]; - - bool returns; - int ifDepth; - int loopRepDepth; - int breakDepth; - - // FIXME: Get rid of llvm:: - llvm::BasicBlock *ifFalseBlock[24 + 24]; - llvm::BasicBlock *loopRepTestBlock[4]; - llvm::BasicBlock *loopRepEndBlock[4]; - llvm::BasicBlock *labelBlock[2048]; - std::vector<llvm::BasicBlock*> callRetBlock; - llvm::BasicBlock *returnBlock; - bool isConditionalIf[24 + 24]; - - const VertexShader *const vertexShader; - }; -} - -#endif // sw_VertexProgram_hpp +// SwiftShader Software Renderer +// +// Copyright(c) 2005-2012 TransGaming Inc. +// +// All rights reserved. No part of this software may be copied, distributed, transmitted, +// transcribed, stored in a retrieval system, translated into any human or computer +// language by any means, or disclosed to third parties without the explicit written +// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express +// or implied, including but not limited to any patent rights, are granted to you. +// + +#ifndef sw_VertexProgram_hpp +#define sw_VertexProgram_hpp + +#include "VertexRoutine.hpp" +#include "ShaderCore.hpp" + +#include "Stream.hpp" +#include "Types.hpp" + +namespace sw +{ + struct Stream; + class VertexShader; + class SamplerCore; + + class VertexProgram : public VertexRoutine, public ShaderCore + { + public: + VertexProgram(const VertexProcessor::State &state, const VertexShader *vertexShader); + + virtual ~VertexProgram(); + + private: + typedef Shader::DestinationParameter Dst; + typedef Shader::SourceParameter Src; + typedef Shader::Control Control; + typedef Shader::Usage Usage; + + void pipeline(Registers &r); + void program(Registers &r); + void passThrough(Registers &r); + + Vector4f reg(Registers &r, const Src &src, int offset = 0); + Vector4f readConstant(Registers &r, const Src &src, int offset = 0); + Int relativeAddress(Registers &r, const Shader::Parameter &var); + Int4 enableMask(Registers &r, const Shader::Instruction *instruction); + + void M3X2(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1); + void M3X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1); + void M3X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1); + void M4X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1); + void M4X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1); + void BREAK(Registers &r); + void BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control); + void BREAKP(Registers &r, const Src &predicateRegister); + void BREAK(Registers &r, Int4 &condition); + void CONTINUE(Registers &r); + void TEST(); + void CALL(Registers &r, int labelIndex, int callSiteIndex); + void CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src); + void CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister); + void CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister); + void ELSE(Registers &r); + void ENDIF(Registers &r); + void ENDLOOP(Registers &r); + void ENDREP(Registers &r); + void ENDWHILE(Registers &r); + void IF(Registers &r, const Src &src); + void IFb(Registers &r, const Src &boolRegister); + void IFp(Registers &r, const Src &predicateRegister); + void IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control); + void IF(Registers &r, Int4 &condition); + void LABEL(int labelIndex); + void LOOP(Registers &r, const Src &integerRegister); + void REP(Registers &r, const Src &integerRegister); + void WHILE(Registers &r, const Src &temporaryRegister); + void RET(Registers &r); + void LEAVE(Registers &r); + void TEXLDL(Registers &r, Vector4f &dst, Vector4f &src, const Src&); + void TEX(Registers &r, Vector4f &dst, Vector4f &src, const Src&); + + void sampleTexture(Registers &r, Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q); + + SamplerCore *sampler[4]; + + int ifDepth; + int loopRepDepth; + int breakDepth; + int currentLabel; + bool whileTest; + + // FIXME: Get rid of llvm:: + llvm::BasicBlock *ifFalseBlock[24 + 24]; + llvm::BasicBlock *loopRepTestBlock[4]; + llvm::BasicBlock *loopRepEndBlock[4]; + llvm::BasicBlock *labelBlock[2048]; + std::vector<llvm::BasicBlock*> callRetBlock[2048]; + llvm::BasicBlock *returnBlock; + bool isConditionalIf[24 + 24]; + }; +} + +#endif // sw_VertexProgram_hpp
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp index f28a741..1c2be07 100644 --- a/src/Shader/VertexRoutine.cpp +++ b/src/Shader/VertexRoutine.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -20,7 +20,10 @@ namespace sw { - VertexRoutine::VertexRoutine(const VertexProcessor::State &state) : state(state) + extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates + extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] + + VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : state(state), shader(shader) { routine = 0; } @@ -46,7 +49,7 @@ UInt count = *Pointer<UInt>(task+ OFFSET(VertexTask,count)); - Registers r; + Registers r(shader); r.data = data; r.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); @@ -82,7 +85,7 @@ Return(); } - routine = function(L"VertexRoutine_%0.16llX", state.shaderHash); + routine = function(L"VertexRoutine_%0.8X", state.shaderID); } Routine *VertexRoutine::getRoutine() @@ -108,41 +111,41 @@ // Backtransform if(state.preTransformed) { - Float4 rhw = Float4(1.0f, 1.0f, 1.0f, 1.0f) / r.ow[pos]; + Float4 rhw = Float4(1.0f) / r.o[pos].w; - Float4 W = *Pointer<Float4>(r.data + OFFSET(DrawData,WWWWx16)) * Float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f); - Float4 H = *Pointer<Float4>(r.data + OFFSET(DrawData,HHHHx16)) * Float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f); - Float4 L = *Pointer<Float4>(r.data + OFFSET(DrawData,LLLLx16)) * Float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f); - Float4 T = *Pointer<Float4>(r.data + OFFSET(DrawData,TTTTx16)) * Float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f); + Float4 W = *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f); + Float4 H = *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f); + Float4 L = *Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f); + Float4 T = *Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f); - r.ox[pos] = (r.ox[pos] - L) / W * rhw; - r.oy[pos] = (r.oy[pos] - T) / H * rhw; - r.oz[pos] = r.oz[pos] * rhw; - r.ow[pos] = rhw; + r.o[pos].x = (r.o[pos].x - L) / W * rhw; + r.o[pos].y = (r.o[pos].y - T) / H * rhw; + r.o[pos].z = r.o[pos].z * rhw; + r.o[pos].w = rhw; } if(state.superSampling) { - r.ox[pos] = r.ox[pos] + *Pointer<Float4>(r.data + OFFSET(DrawData,XXXX)) * r.ow[pos]; - r.oy[pos] = r.oy[pos] + *Pointer<Float4>(r.data + OFFSET(DrawData,YYYY)) * r.ow[pos]; + r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,XXXX)) * r.o[pos].w; + r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,YYYY)) * r.o[pos].w; } - Float4 clipX = r.ox[pos]; - Float4 clipY = r.oy[pos]; + Float4 clipX = r.o[pos].x; + Float4 clipY = r.o[pos].y; if(state.multiSampling) // Clip at pixel edges instead of pixel centers { - clipX += *Pointer<Float4>(r.data + OFFSET(DrawData,offX)) * r.ow[pos]; - clipY += *Pointer<Float4>(r.data + OFFSET(DrawData,offY)) * r.ow[pos]; + clipX += *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w; + clipY += *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w; } - Int4 maxX = CmpLT(r.ow[pos], clipX); - Int4 maxY = CmpLT(r.ow[pos], clipY); - Int4 maxZ = CmpLT(r.ow[pos], r.oz[pos]); + Int4 maxX = CmpLT(r.o[pos].w, clipX); + Int4 maxY = CmpLT(r.o[pos].w, clipY); + Int4 maxZ = CmpLT(r.o[pos].w, r.o[pos].z); - Int4 minX = CmpNLE(-r.ow[pos], clipX); - Int4 minY = CmpNLE(-r.ow[pos], clipY); - Int4 minZ = CmpNLE(Float4(0.0f, 0.0f, 0.0f, 0.0f), r.oz[pos]); + Int4 minX = CmpNLE(-r.o[pos].w, clipX); + Int4 minY = CmpNLE(-r.o[pos].w, clipY); + Int4 minZ = CmpNLE(Float4(0.0f), r.o[pos].z); Int flags; @@ -159,9 +162,9 @@ flags = SignMask(minZ); r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minZ) + flags * 4); - Int4 finiteX = CmpLE(Abs(r.ox[pos]), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); - Int4 finiteY = CmpLE(Abs(r.oy[pos]), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); - Int4 finiteZ = CmpLE(Abs(r.oz[pos]), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); + Int4 finiteX = CmpLE(Abs(r.o[pos].x), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); + Int4 finiteY = CmpLE(Abs(r.o[pos].y), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); + Int4 finiteZ = CmpLE(Abs(r.o[pos].z), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos))); flags = SignMask(finiteX & finiteY & finiteZ); r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,fini) + flags * 4); @@ -172,11 +175,11 @@ } } - Color4f VertexRoutine::readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) + Vector4f VertexRoutine::readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) { const bool texldl = state.shaderContainsTexldl; - Color4f v; + Vector4f v; Pointer<Byte> source0 = buffer + index * stride; Pointer<Byte> source1 = source0 + (!texldl ? stride : 0); @@ -341,8 +344,8 @@ transpose4x3(v.x, v.y, v.z, v.w); - v.y *= Float4(1.0f / 0x00000400, 1.0f / 0x00000400, 1.0f / 0x00000400, 1.0f / 0x00000400); - v.z *= Float4(1.0f / 0x00100000, 1.0f / 0x00100000, 1.0f / 0x00100000, 1.0f / 0x00100000); + v.y *= Float4(1.0f / 0x00000400); + v.z *= Float4(1.0f / 0x00100000); } break; case STREAMTYPE_DEC3N: @@ -390,9 +393,9 @@ transpose4x3(v.x, v.y, v.z, v.w); - v.x *= Float4(1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f); - v.y *= Float4(1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f); - v.z *= Float4(1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f, 1.0f / 0x00400000 / 511.0f); + v.x *= Float4(1.0f / 0x00400000 / 511.0f); + v.y *= Float4(1.0f / 0x00400000 / 511.0f); + v.z *= Float4(1.0f / 0x00400000 / 511.0f); } break; case STREAMTYPE_FIXED: @@ -472,10 +475,10 @@ ASSERT(false); } - if(stream.count < 1) v.x = Float4(0.0f, 0.0f, 0.0f, 0.0f); - if(stream.count < 2) v.y = Float4(0.0f, 0.0f, 0.0f, 0.0f); - if(stream.count < 3) v.z = Float4(0.0f, 0.0f, 0.0f, 0.0f); - if(stream.count < 4) v.w = Float4(1.0f, 1.0f, 1.0f, 1.0f); + if(stream.count < 1) v.x = Float4(0.0f); + if(stream.count < 2) v.y = Float4(0.0f); + if(stream.count < 3) v.z = Float4(0.0f); + if(stream.count < 4) v.w = Float4(1.0f); return v; } @@ -484,55 +487,53 @@ { int pos = state.positionRegister; - if(state.postTransform && !state.preTransformed) + if(halfIntegerCoordinates) { - Float4 posScale = *Pointer<Float4>(r.data + OFFSET(DrawData,posScale)); // FIXME: Unpack + r.o[pos].x = r.o[pos].x - *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w; + r.o[pos].y = r.o[pos].y - *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w; + } - r.ox[pos] = r.ox[pos] * posScale.x; - r.oy[pos] = r.oy[pos] * posScale.y; - - Float4 posOffset = *Pointer<Float4>(r.data + OFFSET(DrawData,posOffset)); // FIXME: Unpack - - r.ox[pos] = r.ox[pos] + r.ow[pos] * posOffset.x; - r.oy[pos] = r.oy[pos] + r.ow[pos] * posOffset.y; + if(symmetricNormalizedDepth) + { + r.o[pos].z = (r.o[pos].z + r.o[pos].w) * Float4(0.5f); } } void VertexRoutine::writeCache(Pointer<Byte> &cacheLine, Registers &r) { - Color4f v; + Vector4f v; for(int i = 0; i < 12; i++) { if(state.output[i].write) { - v.x = r.ox[i]; - v.y = r.oy[i]; - v.z = r.oz[i]; - v.w = r.ow[i]; + v.x = r.o[i].x; + v.y = r.o[i].y; + v.z = r.o[i].z; + v.w = r.o[i].w; if(state.output[i].xClamp) { - v.x = Max(v.x, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - v.x = Min(v.x, Float4(1.0f, 1.0f, 1.0f, 1.0f)); + v.x = Max(v.x, Float4(0.0f)); + v.x = Min(v.x, Float4(1.0f)); } if(state.output[i].yClamp) { - v.y = Max(v.y, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - v.y = Min(v.y, Float4(1.0f, 1.0f, 1.0f, 1.0f)); + v.y = Max(v.y, Float4(0.0f)); + v.y = Min(v.y, Float4(1.0f)); } if(state.output[i].zClamp) { - v.z = Max(v.z, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - v.z = Min(v.z, Float4(1.0f, 1.0f, 1.0f, 1.0f)); + v.z = Max(v.z, Float4(0.0f)); + v.z = Min(v.z, Float4(1.0f)); } if(state.output[i].wClamp) { - v.w = Max(v.w, Float4(0.0f, 0.0f, 0.0f, 0.0f)); - v.w = Min(v.w, Float4(1.0f, 1.0f, 1.0f, 1.0f)); + v.w = Max(v.w, Float4(0.0f)); + v.w = Min(v.w, Float4(1.0f)); } if(state.output[i].write == 0x01) @@ -568,16 +569,16 @@ int pos = state.positionRegister; - v.x = r.ox[pos]; - v.y = r.oy[pos]; - v.z = r.oz[pos]; - v.w = r.ow[pos]; + v.x = r.o[pos].x; + v.y = r.o[pos].y; + v.z = r.o[pos].z; + v.w = r.o[pos].w; - Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0, 0, 0, 0))) & As<Int4>(Float4(1, 1, 1, 1)))); + Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); Float4 rhw = Float4(1.0f) / w; - v.x = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,LLLLx16)) + v.x * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,WWWWx16)))); - v.y = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,TTTTx16)) + v.y * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,HHHHx16)))); + v.x = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16)))); + v.y = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16)))); v.z = v.z * rhw; v.w = rhw;
diff --git a/src/Shader/VertexRoutine.hpp b/src/Shader/VertexRoutine.hpp index 9c21662..72323f2 100644 --- a/src/Shader/VertexRoutine.hpp +++ b/src/Shader/VertexRoutine.hpp
@@ -1,88 +1,105 @@ -// SwiftShader Software Renderer -// -// Copyright(c) 2005-2011 TransGaming Inc. -// -// All rights reserved. No part of this software may be copied, distributed, transmitted, -// transcribed, stored in a retrieval system, translated into any human or computer -// language by any means, or disclosed to third parties without the explicit written -// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express -// or implied, including but not limited to any patent rights, are granted to you. -// - -#ifndef sw_VertexRoutine_hpp -#define sw_VertexRoutine_hpp - -#include "Renderer/Color.hpp" -#include "Renderer/VertexProcessor.hpp" -#include "Reactor/Reactor.hpp" - -namespace sw -{ - class VertexRoutine - { - protected: - struct Registers - { - Registers() : callStack(4), aL(4), increment(4), iteration(4), enableStack(1 + 24), ox(12), oy(12), oz(12), ow(12) - { - loopDepth = -1; - enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - } - - Pointer<Byte> data; - Pointer<Byte> constants; - - Array<Float4> ox; - Array<Float4> oy; - Array<Float4> oz; - Array<Float4> ow; - - Int clipFlags; - - Color4f v[16]; - Color4f r[32]; - Color4f a0; - Array<Int> aL; - Color4f p0; - - Array<Int> increment; - Array<Int> iteration; - - Int loopDepth; - Int stackIndex; // FIXME: Inc/decrement callStack - Array<UInt> callStack; - - Int enableIndex; - Array<Int4> enableStack; - Int4 enableBreak; - }; - - public: - VertexRoutine(const VertexProcessor::State &state); - - virtual ~VertexRoutine(); - - void generate(); - Routine *getRoutine(); - - protected: - const VertexProcessor::State &state; - - private: - virtual void pipeline(Registers &r) = 0; - - typedef VertexProcessor::State::Input Stream; - - Color4f readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index); - void readInput(Registers &r, UInt &index); - void computeClipFlags(Registers &r); - void postTransform(Registers &r); - void writeCache(Pointer<Byte> &cacheLine, Registers &r); - void writeVertex(Pointer<Byte> &vertex, Pointer<Byte> &cacheLine); - - Routine *routine; - }; -} - -#endif // sw_VertexRoutine_hpp +// SwiftShader Software Renderer +// +// Copyright(c) 2005-2012 TransGaming Inc. +// +// All rights reserved. No part of this software may be copied, distributed, transmitted, +// transcribed, stored in a retrieval system, translated into any human or computer +// language by any means, or disclosed to third parties without the explicit written +// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express +// or implied, including but not limited to any patent rights, are granted to you. +// + +#ifndef sw_VertexRoutine_hpp +#define sw_VertexRoutine_hpp + +#include "Renderer/Color.hpp" +#include "Renderer/VertexProcessor.hpp" +#include "ShaderCore.hpp" +#include "VertexShader.hpp" + +namespace sw +{ + class VertexRoutine + { + protected: + struct Registers + { + Registers(const VertexShader *shader) : + r(shader && shader->dynamicallyIndexedTemporaries), + v(shader && shader->dynamicallyIndexedInput), + o(shader && shader->dynamicallyIndexedOutput) + { + loopDepth = -1; + enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + + if(shader && shader->containsBreakInstruction()) + { + enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + if(shader && shader->containsContinueInstruction()) + { + enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + if(shader && shader->containsLeaveInstruction()) + { + enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + } + + Pointer<Byte> data; + Pointer<Byte> constants; + + Int clipFlags; + + RegisterArray<16> v; + RegisterArray<4096> r; + RegisterArray<12> o; + Vector4f a0; + Array<Int, 4> aL; + Vector4f p0; + + Array<Int, 4> increment; + Array<Int, 4> iteration; + + Int loopDepth; + Int stackIndex; // FIXME: Inc/decrement callStack + Array<UInt, 4> callStack; + + Int enableIndex; + Array<Int4, 1 + 24> enableStack; + Int4 enableBreak; + Int4 enableContinue; + Int4 enableLeave; + }; + + public: + VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader); + + virtual ~VertexRoutine(); + + void generate(); + Routine *getRoutine(); + + protected: + const VertexProcessor::State &state; + const VertexShader *const shader; + + private: + virtual void pipeline(Registers &r) = 0; + + typedef VertexProcessor::State::Input Stream; + + Vector4f readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index); + void readInput(Registers &r, UInt &index); + void computeClipFlags(Registers &r); + void postTransform(Registers &r); + void writeCache(Pointer<Byte> &cacheLine, Registers &r); + void writeVertex(Pointer<Byte> &vertex, Pointer<Byte> &cacheLine); + + Routine *routine; + }; +} + +#endif // sw_VertexRoutine_hpp
diff --git a/src/Shader/VertexShader.cpp b/src/Shader/VertexShader.cpp index c958820..d5b1171 100644 --- a/src/Shader/VertexShader.cpp +++ b/src/Shader/VertexShader.cpp
@@ -1,6 +1,6 @@ // SwiftShader Software Renderer // -// Copyright(c) 2005-2011 TransGaming Inc. +// Copyright(c) 2005-2012 TransGaming Inc. // // All rights reserved. No part of this software may be copied, distributed, transmitted, // transcribed, stored in a retrieval system, translated into any human or computer @@ -16,50 +16,53 @@ namespace sw { - VertexShader::VertexShader(const unsigned long *token) : Shader(token) + VertexShader::VertexShader(const VertexShader *vs) : Shader() + { + version = 0x0300; + positionRegister = Pos; + pointSizeRegister = -1; // No vertex point size + + for(int i = 0; i < 16; i++) + { + input[i] = Semantic(-1, -1); + } + + if(vs) // Make a copy + { + for(int i = 0; i < vs->getLength(); i++) + { + append(new sw::Shader::Instruction(*vs->getInstruction(i))); + } + + memcpy(output, vs->output, sizeof(output)); + memcpy(input, vs->input, sizeof(input)); + positionRegister = vs->positionRegister; + pointSizeRegister = vs->pointSizeRegister; + usedSamplers = vs->usedSamplers; + + analyze(); + } + } + + VertexShader::VertexShader(const unsigned long *token) : Shader() { parse(token); + + positionRegister = Pos; + pointSizeRegister = -1; // No vertex point size + + for(int i = 0; i < 16; i++) + { + input[i] = Semantic(-1, -1); + } + + analyze(); } VertexShader::~VertexShader() { } - void VertexShader::parse(const unsigned long *token) - { - minorVersion = (unsigned char)(token[0] & 0x000000FF); - majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8); - shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16); - - length = validate(token); - ASSERT(length != 0); - - instruction = new Shader::Instruction*[length]; - - for(int i = 0; i < length; i++) - { - while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token - { - int length = (*token & 0x7FFF0000) >> 16; - - token += length + 1; - } - - int tokenCount = size(*token); - - instruction[i] = new Instruction(token, tokenCount, majorVersion); - - token += 1 + tokenCount; - } - - analyzeInput(); - analyzeOutput(); - analyzeDirtyConstants(); - analyzeTexldl(); - analyzeDynamicBranching(); - analyzeSamplers(); - } - int VertexShader::validate(const unsigned long *const token) { if(!token) @@ -89,36 +92,36 @@ } else { - ShaderOpcode opcode = (ShaderOpcode)(token[i] & 0x0000FFFF); + Shader::Opcode opcode = (Shader::Opcode)(token[i] & 0x0000FFFF); switch(opcode) { - case ShaderOperation::OPCODE_TEXCOORD: - case ShaderOperation::OPCODE_TEXKILL: - case ShaderOperation::OPCODE_TEX: - case ShaderOperation::OPCODE_TEXBEM: - case ShaderOperation::OPCODE_TEXBEML: - case ShaderOperation::OPCODE_TEXREG2AR: - case ShaderOperation::OPCODE_TEXREG2GB: - case ShaderOperation::OPCODE_TEXM3X2PAD: - case ShaderOperation::OPCODE_TEXM3X2TEX: - case ShaderOperation::OPCODE_TEXM3X3PAD: - case ShaderOperation::OPCODE_TEXM3X3TEX: - case ShaderOperation::OPCODE_RESERVED0: - case ShaderOperation::OPCODE_TEXM3X3SPEC: - case ShaderOperation::OPCODE_TEXM3X3VSPEC: - case ShaderOperation::OPCODE_TEXREG2RGB: - case ShaderOperation::OPCODE_TEXDP3TEX: - case ShaderOperation::OPCODE_TEXM3X2DEPTH: - case ShaderOperation::OPCODE_TEXDP3: - case ShaderOperation::OPCODE_TEXM3X3: - case ShaderOperation::OPCODE_TEXDEPTH: - case ShaderOperation::OPCODE_CMP: - case ShaderOperation::OPCODE_BEM: - case ShaderOperation::OPCODE_DP2ADD: - case ShaderOperation::OPCODE_DSX: - case ShaderOperation::OPCODE_DSY: - case ShaderOperation::OPCODE_TEXLDD: + case Shader::OPCODE_TEXCOORD: + case Shader::OPCODE_TEXKILL: + case Shader::OPCODE_TEX: + case Shader::OPCODE_TEXBEM: + case Shader::OPCODE_TEXBEML: + case Shader::OPCODE_TEXREG2AR: + case Shader::OPCODE_TEXREG2GB: + case Shader::OPCODE_TEXM3X2PAD: + case Shader::OPCODE_TEXM3X2TEX: + case Shader::OPCODE_TEXM3X3PAD: + case Shader::OPCODE_TEXM3X3TEX: + case Shader::OPCODE_RESERVED0: + case Shader::OPCODE_TEXM3X3SPEC: + case Shader::OPCODE_TEXM3X3VSPEC: + case Shader::OPCODE_TEXREG2RGB: + case Shader::OPCODE_TEXDP3TEX: + case Shader::OPCODE_TEXM3X2DEPTH: + case Shader::OPCODE_TEXDP3: + case Shader::OPCODE_TEXM3X3: + case Shader::OPCODE_TEXDEPTH: + case Shader::OPCODE_CMP0: + case Shader::OPCODE_BEM: + case Shader::OPCODE_DP2ADD: + case Shader::OPCODE_DFDX: + case Shader::OPCODE_DFDY: + case Shader::OPCODE_TEXLDD: return 0; // Unsupported operation default: instructionCount++; @@ -137,81 +140,85 @@ return texldl; } + void VertexShader::analyze() + { + analyzeInput(); + analyzeOutput(); + analyzeDirtyConstants(); + analyzeTexldl(); + analyzeDynamicBranching(); + analyzeSamplers(); + analyzeCallSites(); + analyzeDynamicIndexing(); + } + void VertexShader::analyzeInput() { - for(int i = 0; i < 16; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - input[i] = Semantic(-1, -1); - } - - for(int i = 0; i < length; i++) - { - if(instruction[i]->getOpcode() == ShaderOperation::OPCODE_DCL && - instruction[i]->getDestinationParameter().type == ShaderParameter::PARAMETER_INPUT) + if(instruction[i]->opcode == Shader::OPCODE_DCL && + instruction[i]->dst.type == Shader::PARAMETER_INPUT) { - int index = instruction[i]->getDestinationParameter().index; + int index = instruction[i]->dst.index; - input[index] = Semantic(instruction[i]->getUsage(), instruction[i]->getUsageIndex()); + input[index] = Semantic(instruction[i]->usage, instruction[i]->usageIndex); } } } void VertexShader::analyzeOutput() { - positionRegister = Pos; - pointSizeRegister = -1; // No vertex point size - if(version < 0x0300) { - output[Pos][0] = Semantic(ShaderOperation::USAGE_POSITION, 0); - output[Pos][1] = Semantic(ShaderOperation::USAGE_POSITION, 0); - output[Pos][2] = Semantic(ShaderOperation::USAGE_POSITION, 0); - output[Pos][3] = Semantic(ShaderOperation::USAGE_POSITION, 0); + output[Pos][0] = Semantic(Shader::USAGE_POSITION, 0); + output[Pos][1] = Semantic(Shader::USAGE_POSITION, 0); + output[Pos][2] = Semantic(Shader::USAGE_POSITION, 0); + output[Pos][3] = Semantic(Shader::USAGE_POSITION, 0); - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - const Instruction::DestinationParameter &dst = instruction[i]->getDestinationParameter(); + const DestinationParameter &dst = instruction[i]->dst; switch(dst.type) { - case ShaderParameter::PARAMETER_RASTOUT: + case Shader::PARAMETER_RASTOUT: switch(dst.index) { case 0: // Position already assumed written break; case 1: - output[Fog][0] = Semantic(ShaderOperation::USAGE_FOG, 0); + output[Fog][0] = Semantic(Shader::USAGE_FOG, 0); break; case 2: - output[Pts][1] = Semantic(ShaderOperation::USAGE_PSIZE, 0); + output[Pts][1] = Semantic(Shader::USAGE_PSIZE, 0); pointSizeRegister = Pts; break; default: ASSERT(false); } break; - case ShaderParameter::PARAMETER_ATTROUT: + case Shader::PARAMETER_ATTROUT: if(dst.index == 0) { - if(dst.x) output[D0][0] = Semantic(ShaderOperation::USAGE_COLOR, 0); - if(dst.y) output[D0][1] = Semantic(ShaderOperation::USAGE_COLOR, 0); - if(dst.z) output[D0][2] = Semantic(ShaderOperation::USAGE_COLOR, 0); - if(dst.w) output[D0][3] = Semantic(ShaderOperation::USAGE_COLOR, 0); + if(dst.x) output[D0][0] = Semantic(Shader::USAGE_COLOR, 0); + if(dst.y) output[D0][1] = Semantic(Shader::USAGE_COLOR, 0); + if(dst.z) output[D0][2] = Semantic(Shader::USAGE_COLOR, 0); + if(dst.w) output[D0][3] = Semantic(Shader::USAGE_COLOR, 0); } else if(dst.index == 1) { - if(dst.x) output[D1][0] = Semantic(ShaderOperation::USAGE_COLOR, 1); - if(dst.y) output[D1][1] = Semantic(ShaderOperation::USAGE_COLOR, 1); - if(dst.z) output[D1][2] = Semantic(ShaderOperation::USAGE_COLOR, 1); - if(dst.w) output[D1][3] = Semantic(ShaderOperation::USAGE_COLOR, 1); + if(dst.x) output[D1][0] = Semantic(Shader::USAGE_COLOR, 1); + if(dst.y) output[D1][1] = Semantic(Shader::USAGE_COLOR, 1); + if(dst.z) output[D1][2] = Semantic(Shader::USAGE_COLOR, 1); + if(dst.w) output[D1][3] = Semantic(Shader::USAGE_COLOR, 1); } else ASSERT(false); break; - case ShaderParameter::PARAMETER_TEXCRDOUT: - if(dst.x) output[T0 + dst.index][0] = Semantic(ShaderOperation::USAGE_TEXCOORD, dst.index); - if(dst.y) output[T0 + dst.index][1] = Semantic(ShaderOperation::USAGE_TEXCOORD, dst.index); - if(dst.z) output[T0 + dst.index][2] = Semantic(ShaderOperation::USAGE_TEXCOORD, dst.index); - if(dst.w) output[T0 + dst.index][3] = Semantic(ShaderOperation::USAGE_TEXCOORD, dst.index); + case Shader::PARAMETER_TEXCRDOUT: + if(dst.x) output[T0 + dst.index][0] = Semantic(Shader::USAGE_TEXCOORD, dst.index); + if(dst.y) output[T0 + dst.index][1] = Semantic(Shader::USAGE_TEXCOORD, dst.index); + if(dst.z) output[T0 + dst.index][2] = Semantic(Shader::USAGE_TEXCOORD, dst.index); + if(dst.w) output[T0 + dst.index][3] = Semantic(Shader::USAGE_TEXCOORD, dst.index); break; default: break; @@ -220,27 +227,27 @@ } else // Shader Model 3.0 input declaration { - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - if(instruction[i]->getOpcode() == ShaderOperation::OPCODE_DCL && - instruction[i]->getDestinationParameter().type == ShaderParameter::PARAMETER_OUTPUT) + if(instruction[i]->opcode == Shader::OPCODE_DCL && + instruction[i]->dst.type == Shader::PARAMETER_OUTPUT) { - unsigned char usage = instruction[i]->getUsage(); - unsigned char usageIndex = instruction[i]->getUsageIndex(); + unsigned char usage = instruction[i]->usage; + unsigned char usageIndex = instruction[i]->usageIndex; - const Instruction::DestinationParameter &dst = instruction[i]->getDestinationParameter(); + const DestinationParameter &dst = instruction[i]->dst; if(dst.x) output[dst.index][0] = Semantic(usage, usageIndex); if(dst.y) output[dst.index][1] = Semantic(usage, usageIndex); if(dst.z) output[dst.index][2] = Semantic(usage, usageIndex); if(dst.w) output[dst.index][3] = Semantic(usage, usageIndex); - if(usage == ShaderOperation::USAGE_POSITION && usageIndex == 0) + if(usage == Shader::USAGE_POSITION && usageIndex == 0) { positionRegister = dst.index; } - if(usage == ShaderOperation::USAGE_PSIZE && usageIndex == 0) + if(usage == Shader::USAGE_PSIZE && usageIndex == 0) { pointSizeRegister = dst.index; } @@ -253,9 +260,9 @@ { texldl = false; - for(int i = 0; i < length; i++) + for(unsigned int i = 0; i < instruction.size(); i++) { - if(instruction[i]->getOpcode() == Instruction::Operation::OPCODE_TEXLDL) + if(instruction[i]->opcode == Shader::OPCODE_TEXLDL) { texldl = true;
diff --git a/src/Shader/VertexShader.hpp b/src/Shader/VertexShader.hpp index 1200b20..3bfc0f4 100644 --- a/src/Shader/VertexShader.hpp +++ b/src/Shader/VertexShader.hpp
@@ -1,48 +1,47 @@ -// SwiftShader Software Renderer -// -// Copyright(c) 2005-2011 TransGaming Inc. -// -// All rights reserved. No part of this software may be copied, distributed, transmitted, -// transcribed, stored in a retrieval system, translated into any human or computer -// language by any means, or disclosed to third parties without the explicit written -// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express -// or implied, including but not limited to any patent rights, are granted to you. -// - -#ifndef sw_VertexShader_hpp -#define sw_VertexShader_hpp - -#include "Shader.hpp" - -namespace sw -{ - class VertexShader : public Shader - { - public: - VertexShader(const unsigned long *token); - - virtual ~VertexShader(); - - static int validate(const unsigned long *const token); // Returns number of instructions if valid - bool containsTexldl() const; - - int positionRegister; // FIXME: Private - int pointSizeRegister; // FIXME: Private - - Semantic input[16]; // FIXME: Private - Semantic output[12][4]; // FIXME: Private - - private: - void parse(const unsigned long *token); - - void analyzeInput(); - void analyzeOutput(); - void analyzeTexldl(); - - bool texldl; - }; - - typedef VertexShader::Instruction VertexShaderInstruction; -} - -#endif // sw_VertexShader_hpp +// SwiftShader Software Renderer +// +// Copyright(c) 2005-2012 TransGaming Inc. +// +// All rights reserved. No part of this software may be copied, distributed, transmitted, +// transcribed, stored in a retrieval system, translated into any human or computer +// language by any means, or disclosed to third parties without the explicit written +// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express +// or implied, including but not limited to any patent rights, are granted to you. +// + +#ifndef sw_VertexShader_hpp +#define sw_VertexShader_hpp + +#include "Shader.hpp" + +namespace sw +{ + class VertexShader : public Shader + { + public: + explicit VertexShader(const VertexShader *vs = 0); + explicit VertexShader(const unsigned long *token); + + virtual ~VertexShader(); + + static int validate(const unsigned long *const token); // Returns number of instructions if valid + bool containsTexldl() const; + + virtual void analyze(); + + int positionRegister; // FIXME: Private + int pointSizeRegister; // FIXME: Private + + Semantic input[16]; // FIXME: Private + Semantic output[12][4]; // FIXME: Private + + private: + void analyzeInput(); + void analyzeOutput(); + void analyzeTexldl(); + + bool texldl; + }; +} + +#endif // sw_VertexShader_hpp