Splitting PixelRoutine into PixelProgram and PixelPipeline

This cl splits PixelRoutine into 2 specialized classes:
PixelProgram and PixelPipeline.
In this cl:
- Moved all specialized behavior of PixelRoutine into the
  PixelProgram and PixelPipeline classes.
- Inverted hierarchical dependency between PixelRoutine and
  QuadRasterizer. QuadRasterizer is now the base class.
- Added a check to PixelProcessor::routine() to either create
  a PixelPipeline object or a PixelProgram object.
- Moved a few interpolation related utility functions from
  PixelRoutine down to QuadRasterizer.
- Added Registers hierarchy. PixelProgram specific Registers
  and PixelPipeline specific Registers are now mutually
  exclusive.
- Made the quad functions virtual
- Added a few virtual functions (setBuiltins, ps, alphaTest,
  rasterOperation) for Program/Pipeline specific implementations

Bug 20257503

Change-Id: I6abe536a5521d9842f757a8bbb52e3947e3c9250
Reviewed-on: https://swiftshader-review.googlesource.com/3634
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Android.mk b/src/Android.mk
index 248fd28..3372d78 100644
--- a/src/Android.mk
+++ b/src/Android.mk
@@ -55,6 +55,8 @@
 
 LOCAL_SRC_FILES += \
 	Shader/Constants.cpp \
+	Shader/PixelPipeline.cpp \
+	Shader/PixelProgram.cpp \
 	Shader/PixelRoutine.cpp \
 	Shader/PixelShader.cpp \
 	Shader/SamplerCore.cpp \
diff --git a/src/OpenGL/libGLES_CM/libGLES_CM.cbp b/src/OpenGL/libGLES_CM/libGLES_CM.cbp
index 511f127..905a884 100644
--- a/src/OpenGL/libGLES_CM/libGLES_CM.cbp
+++ b/src/OpenGL/libGLES_CM/libGLES_CM.cbp
@@ -233,6 +233,10 @@
 		<Unit filename="../../Renderer/VertexProcessor.hpp" />
 		<Unit filename="../../Shader/Constants.cpp" />
 		<Unit filename="../../Shader/Constants.hpp" />
+		<Unit filename="../../Shader/PixelPipeline.cpp" />
+		<Unit filename="../../Shader/PixelPipeline.hpp" />
+		<Unit filename="../../Shader/PixelProgram.cpp" />
+		<Unit filename="../../Shader/PixelProgram.hpp" />
 		<Unit filename="../../Shader/PixelRoutine.cpp" />
 		<Unit filename="../../Shader/PixelRoutine.hpp" />
 		<Unit filename="../../Shader/PixelShader.cpp" />
diff --git a/src/OpenGL/libGLESv2/libGLESv2.cbp b/src/OpenGL/libGLESv2/libGLESv2.cbp
index b797561..cd3d3d1 100644
--- a/src/OpenGL/libGLESv2/libGLESv2.cbp
+++ b/src/OpenGL/libGLESv2/libGLESv2.cbp
@@ -232,6 +232,10 @@
 		<Unit filename="../../Renderer/VertexProcessor.hpp" />
 		<Unit filename="../../Shader/Constants.cpp" />
 		<Unit filename="../../Shader/Constants.hpp" />
+		<Unit filename="../../Shader/PixelPipeline.cpp" />
+		<Unit filename="../../Shader/PixelPipeline.hpp" />
+		<Unit filename="../../Shader/PixelProgram.cpp" />
+		<Unit filename="../../Shader/PixelProgram.hpp" />
 		<Unit filename="../../Shader/PixelRoutine.cpp" />
 		<Unit filename="../../Shader/PixelRoutine.hpp" />
 		<Unit filename="../../Shader/PixelShader.cpp" />
diff --git a/src/Renderer/PixelProcessor.cpp b/src/Renderer/PixelProcessor.cpp
index 9f2b4ff..e21cbee 100644
--- a/src/Renderer/PixelProcessor.cpp
+++ b/src/Renderer/PixelProcessor.cpp
@@ -11,7 +11,8 @@
 
 #include "PixelProcessor.hpp"
 
-#include "QuadRasterizer.hpp"
+#include "PixelPipeline.hpp"
+#include "PixelProgram.hpp"
 #include "PixelShader.hpp"
 #include "MetaMacro.hpp"
 #include "Surface.hpp"
@@ -1057,7 +1058,16 @@
 
 		if(!routine)
 		{
-			Rasterizer *generator = new QuadRasterizer(state, context->pixelShader);
+			const bool integerPipeline = (context->pixelShaderVersion() <= 0x0104);
+			Rasterizer *generator = nullptr;
+			if(integerPipeline)
+			{
+				generator = new PixelPipeline(state, context->pixelShader);
+			}
+			else
+			{
+				generator = new PixelProgram(state, context->pixelShader);
+			}
 			generator->generate();
 			routine = generator->getRoutine();
 			delete generator;
diff --git a/src/Renderer/QuadRasterizer.cpp b/src/Renderer/QuadRasterizer.cpp
index 543c0ce..a9298d4 100644
--- a/src/Renderer/QuadRasterizer.cpp
+++ b/src/Renderer/QuadRasterizer.cpp
@@ -21,10 +21,23 @@
 {
 	extern bool veryEarlyDepthTest;
 	extern bool complementaryDepthBuffer;
+	extern bool fullPixelPositionRegister;
 
 	extern int clusterCount;
 
-	QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : PixelRoutine(state, pixelShader)
+	QuadRasterizer::Registers::Registers()
+	{
+		occlusion = 0;
+
+#if PERF_PROFILE
+		for(int i = 0; i < PERF_TIMERS; i++)
+		{
+			cycles[i] = 0;
+		}
+#endif
+	}
+
+	QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : Rasterizer(state), shader(pixelShader)
 	{
 	}
 
@@ -45,7 +58,7 @@
 			Int cluster(function.arg(2));
 			Pointer<Byte> data(function.arg(3));
 
-			Registers r(shader);
+			Registers& r = *createRegisters(shader);
 			r.constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
 			r.cluster = cluster;
 			r.data = data;
@@ -89,6 +102,8 @@
 			#endif
 
 			Return();
+
+			delete &r;
 		}
 
 		routine = function(L"PixelRoutine_%0.8X", state.shaderID);
@@ -317,4 +332,31 @@
 		}
 		Until(y >= yMax)
 	}
+
+	Float4 QuadRasterizer::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
+	{
+		Float4 interpolant = D;
+
+		if(!flat)
+		{
+			interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16);
+
+			if(perspective)
+			{
+				interpolant *= rhw;
+			}
+		}
+
+		return interpolant;
+	}
+
+	bool QuadRasterizer::interpolateZ() const
+	{
+		return state.depthTestActive || state.pixelFogActive() || (shader && shader->vPosDeclared && fullPixelPositionRegister);
+	}
+
+	bool QuadRasterizer::interpolateW() const
+	{
+		return state.perspective || (shader && shader->vPosDeclared && fullPixelPositionRegister);
+	}
 }
diff --git a/src/Renderer/QuadRasterizer.hpp b/src/Renderer/QuadRasterizer.hpp
index 36184cc..c1b05c0 100644
--- a/src/Renderer/QuadRasterizer.hpp
+++ b/src/Renderer/QuadRasterizer.hpp
@@ -13,17 +13,51 @@
 #define sw_QuadRasterizer_hpp
 
 #include "Rasterizer.hpp"
-#include "PixelRoutine.hpp"
+#include "ShaderCore.hpp"
+#include "PixelShader.hpp"
+
+#include "Types.hpp"
 
 namespace sw
 {
-	class QuadRasterizer : public PixelRoutine
+	class QuadRasterizer : public Rasterizer
 	{
-	public:
-		QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader);
+	protected:
+		QuadRasterizer(const PixelProcessor::State &state, const PixelShader *shader);
 
 		virtual ~QuadRasterizer();
 
+		struct Registers
+		{
+			Registers();
+
+			Pointer<Byte> constants;
+
+			Pointer<Byte> primitive;
+			Int cluster;
+			Pointer<Byte> data;
+
+			Float4 Dz[4];
+			Float4 Dw;
+			Float4 Dv[10][4];
+			Float4 Df;
+
+			UInt occlusion;
+
+#if PERF_PROFILE
+			Long cycles[PERF_TIMERS];
+#endif
+		};
+
+		virtual void quad(Registers &r, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) = 0;
+		virtual Registers* createRegisters(const PixelShader *shader) = 0;
+
+		bool interpolateZ() const;
+		bool interpolateW() const;
+		Float4 interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
+
+		const PixelShader *const shader;
+
 	private:
 		void generate();
 
diff --git a/src/Shader/PixelPipeline.cpp b/src/Shader/PixelPipeline.cpp
new file mode 100644
index 0000000..8a38bed
--- /dev/null
+++ b/src/Shader/PixelPipeline.cpp
@@ -0,0 +1,1968 @@
+// SwiftShader Software Renderer

+//

+// Copyright(c) 2015 Google Inc.

+//

+// All rights reserved. No part of this software may be copied, distributed, transmitted,

+// transcribed, stored in a retrieval system, translated into any human or computer

+// language by any means, or disclosed to third parties without the explicit written

+// agreement of Google Inc. Without such an agreement, no rights or licenses, express

+// or implied, including but not limited to any patent rights, are granted to you.

+//

+

+#include "PixelPipeline.hpp"

+#include "Renderer.hpp"

+#include "SamplerCore.hpp"

+

+namespace sw

+{

+	extern bool postBlendSRGB;

+

+	void PixelPipeline::setBuiltins(PixelRoutine::Registers &rBase, Int &x, Int &y, Float4(&z)[4], Float4 &w)

+	{

+		Registers& r = *static_cast<Registers*>(&rBase);

+

+		if(state.color[0].component & 0x1) r.diffuse.x = convertFixed12(r.vf[0].x); else r.diffuse.x = Short4(0x1000);
+		if(state.color[0].component & 0x2) r.diffuse.y = convertFixed12(r.vf[0].y); else r.diffuse.y = Short4(0x1000);
+		if(state.color[0].component & 0x4) r.diffuse.z = convertFixed12(r.vf[0].z); else r.diffuse.z = Short4(0x1000);
+		if(state.color[0].component & 0x8) r.diffuse.w = convertFixed12(r.vf[0].w); else r.diffuse.w = Short4(0x1000);
+
+		if(state.color[1].component & 0x1) r.specular.x = convertFixed12(r.vf[1].x); else r.specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
+		if(state.color[1].component & 0x2) r.specular.y = convertFixed12(r.vf[1].y); else r.specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
+		if(state.color[1].component & 0x4) r.specular.z = convertFixed12(r.vf[1].z); else r.specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
+		if(state.color[1].component & 0x8) r.specular.w = convertFixed12(r.vf[1].w); else r.specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);

+	}

+

+	void PixelPipeline::fixedFunction(Registers& r)
+	{

+		r.current = r.diffuse;
+		Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
+
+		for(int stage = 0; stage < 8; stage++)
+		{
+			if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
+			{
+				break;
+			}
+
+			Vector4s texture;
+
+			if(state.textureStage[stage].usesTexture)
+			{
+				sampleTexture(r, texture, stage, stage);
+			}
+
+			blendTexture(r, temp, texture, stage);
+		}
+
+		specularPixel(r.current, r.specular);

+	}

+

+	void PixelPipeline::applyShader(PixelRoutine::Registers &rBase, Int cMask[4])
+	{
+		Registers& r = *static_cast<Registers*>(&rBase);
+
+		if(!shader)
+		{
+			fixedFunction(r);
+			return;
+		}
+
+		int pad = 0;        // Count number of texm3x3pad instructions
+		Vector4s dPairing;   // Destination for first pairing instruction
+
+		for(size_t i = 0; i < shader->getLength(); i++)
+		{
+			const Shader::Instruction *instruction = shader->getInstruction(i);
+			Shader::Opcode opcode = instruction->opcode;
+
+			//	#ifndef NDEBUG   // FIXME: Centralize debug output control
+			//		shader->printInstruction(i, "debug.txt");
+			//	#endif
+
+			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
+			{
+				continue;
+			}
+
+			const Dst &dst = instruction->dst;
+			const Src &src0 = instruction->src[0];
+			const Src &src1 = instruction->src[1];
+			const Src &src2 = instruction->src[2];
+
+			unsigned short version = shader->getVersion();
+			bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue;   // First instruction of pair
+			bool coissue = instruction->coissue;                                                              // Second instruction of pair
+
+			Vector4s d;
+			Vector4s s0;
+			Vector4s s1;
+			Vector4s s2;
+
+			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterS(r, src0);
+			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterS(r, src1);
+			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterS(r, src2);
+
+			Float4 u = version < 0x0104 ? r.vf[2 + dst.index].x : r.vf[2 + src0.index].x;
+			Float4 v = version < 0x0104 ? r.vf[2 + dst.index].y : r.vf[2 + src0.index].y;
+			Float4 s = version < 0x0104 ? r.vf[2 + dst.index].z : r.vf[2 + src0.index].z;
+			Float4 t = version < 0x0104 ? r.vf[2 + dst.index].w : r.vf[2 + src0.index].w;
+
+			switch(opcode)
+			{
+			case Shader::OPCODE_PS_1_0: break;
+			case Shader::OPCODE_PS_1_1: break;
+			case Shader::OPCODE_PS_1_2: break;
+			case Shader::OPCODE_PS_1_3: break;
+			case Shader::OPCODE_PS_1_4: break;
+
+			case Shader::OPCODE_DEF:    break;
+
+			case Shader::OPCODE_NOP:    break;
+			case Shader::OPCODE_MOV: MOV(d, s0);         break;
+			case Shader::OPCODE_ADD: ADD(d, s0, s1);     break;
+			case Shader::OPCODE_SUB: SUB(d, s0, s1);     break;
+			case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break;
+			case Shader::OPCODE_MUL: MUL(d, s0, s1);     break;
+			case Shader::OPCODE_DP3: DP3(d, s0, s1);     break;
+			case Shader::OPCODE_DP4: DP4(d, s0, s1);     break;
+			case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break;
+			case Shader::OPCODE_TEXCOORD:
+				if(version < 0x0104)
+				{
+					TEXCOORD(d, u, v, s, dst.index);
+				}
+				else
+				{
+					if((src0.swizzle & 0x30) == 0x20)   // .xyz
+					{
+						TEXCRD(d, u, v, s, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
+					}
+					else   // .xyw
+					{
+						TEXCRD(d, u, v, t, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
+					}
+				}
+				break;
+			case Shader::OPCODE_TEXKILL:
+				if(version < 0x0104)
+				{
+					TEXKILL(cMask, u, v, s);
+				}
+				else if(version == 0x0104)
+				{
+					if(dst.type == Shader::PARAMETER_TEXTURE)
+					{
+						TEXKILL(cMask, u, v, s);
+					}
+					else
+					{
+						TEXKILL(cMask, r.rs[dst.index]);
+					}
+				}
+				else ASSERT(false);
+				break;
+			case Shader::OPCODE_TEX:
+				if(version < 0x0104)
+				{
+					TEX(r, d, u, v, s, dst.index, false);
+				}
+				else if(version == 0x0104)
+				{
+					if(src0.type == Shader::PARAMETER_TEXTURE)
+					{
+						if((src0.swizzle & 0x30) == 0x20)   // .xyz
+						{
+							TEX(r, d, u, v, s, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
+						}
+						else   // .xyw
+						{
+							TEX(r, d, u, v, t, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
+						}
+					}
+					else
+					{
+						TEXLD(r, d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
+					}
+				}
+				else ASSERT(false);
+				break;
+			case Shader::OPCODE_TEXBEM:       TEXBEM(r, d, s0, u, v, s, dst.index);                                             break;
+			case Shader::OPCODE_TEXBEML:      TEXBEML(r, d, s0, u, v, s, dst.index);                                            break;
+			case Shader::OPCODE_TEXREG2AR:    TEXREG2AR(r, d, s0, dst.index);                                                   break;
+			case Shader::OPCODE_TEXREG2GB:    TEXREG2GB(r, d, s0, dst.index);                                                   break;
+			case Shader::OPCODE_TEXM3X2PAD:   TEXM3X2PAD(r, u, v, s, s0, 0, src0.modifier == Shader::MODIFIER_SIGN);            break;
+			case Shader::OPCODE_TEXM3X2TEX:   TEXM3X2TEX(r, d, u, v, s, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
+			case Shader::OPCODE_TEXM3X3PAD:   TEXM3X3PAD(r, u, v, s, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN);    break;
+			case Shader::OPCODE_TEXM3X3TEX:   TEXM3X3TEX(r, d, u, v, s, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
+			case Shader::OPCODE_TEXM3X3SPEC:  TEXM3X3SPEC(r, d, u, v, s, dst.index, s0, s1);                                    break;
+			case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(r, d, u, v, s, dst.index, s0);                                       break;
+			case Shader::OPCODE_CND:          CND(d, s0, s1, s2);                                                               break;
+			case Shader::OPCODE_TEXREG2RGB:   TEXREG2RGB(r, d, s0, dst.index);                                                  break;
+			case Shader::OPCODE_TEXDP3TEX:    TEXDP3TEX(r, d, u, v, s, dst.index, s0);                                          break;
+			case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(r, d, u, v, s, s0, src0.modifier == Shader::MODIFIER_SIGN);          break;
+			case Shader::OPCODE_TEXDP3:       TEXDP3(r, d, u, v, s, s0);                                                        break;
+			case Shader::OPCODE_TEXM3X3:      TEXM3X3(r, d, u, v, s, s0, src0.modifier == Shader::MODIFIER_SIGN);               break;
+			case Shader::OPCODE_TEXDEPTH:     TEXDEPTH(r);                                                                      break;
+			case Shader::OPCODE_CMP0:         CMP(d, s0, s1, s2);                                                               break;
+			case Shader::OPCODE_BEM:          BEM(r, d, s0, s1, dst.index);                                                     break;
+			case Shader::OPCODE_PHASE:                                                                                          break;
+			case Shader::OPCODE_END:                                                                                            break;
+			default:
+				ASSERT(false);
+			}
+
+			if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
+			{
+				if(dst.shift > 0)
+				{
+					if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); }
+					if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); }
+					if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); }
+					if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); }
+				}
+				else if(dst.shift < 0)
+				{
+					if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
+					if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
+					if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
+					if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
+				}
+
+				if(dst.saturate)
+				{
+					if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
+					if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
+					if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
+					if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
+				}
+
+				if(pairing)
+				{
+					if(dst.mask & 0x1) dPairing.x = d.x;
+					if(dst.mask & 0x2) dPairing.y = d.y;
+					if(dst.mask & 0x4) dPairing.z = d.z;
+					if(dst.mask & 0x8) dPairing.w = d.w;
+				}
+
+				if(coissue)
+				{
+					const Dst &dst = shader->getInstruction(i - 1)->dst;
+
+					writeDestination(r, dPairing, dst);
+				}
+
+				if(!pairing)
+				{
+					writeDestination(r, d, dst);
+				}
+			}
+		}
+	}

+

+	Bool PixelPipeline::alphaTest(PixelRoutine::Registers &rBase, Int cMask[4])

+	{

+		Registers& r = *static_cast<Registers*>(&rBase);

+

+		r.current.x = Min(r.current.x, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.x = Max(r.current.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+		r.current.y = Min(r.current.y, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.y = Max(r.current.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+		r.current.z = Min(r.current.z, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.z = Max(r.current.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+		r.current.w = Min(r.current.w, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.w = Max(r.current.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+
+		if(!state.alphaTestActive())
+		{
+			return true;
+		}
+
+		Int aMask;
+
+		if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
+		{
+			PixelRoutine::alphaTest(r, aMask, r.current.w);
+
+			for(unsigned int q = 0; q < state.multiSample; q++)
+			{
+				cMask[q] &= aMask;
+			}
+		}
+		else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
+		{
+			Float4 alpha = Float4(r.current.w) * Float4(1.0f / 0x1000);
+
+			alphaToCoverage(r, cMask, alpha);
+		}
+		else ASSERT(false);
+
+		Int pass = cMask[0];
+
+		for(unsigned int q = 1; q < state.multiSample; q++)
+		{
+			pass = pass | cMask[q];
+		}
+
+		return pass != 0x0;

+	}

+

+	void PixelPipeline::rasterOperation(PixelRoutine::Registers &rBase, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])

+	{

+		Registers& r = *static_cast<Registers*>(&rBase);

+

+		if(!state.colorWriteActive(0))
+		{
+			return;
+		}
+
+		Vector4f oC;
+
+		switch(state.targetFormat[0])
+		{
+		case FORMAT_R5G6B5:
+		case FORMAT_X8R8G8B8:
+		case FORMAT_X8B8G8R8:
+		case FORMAT_A8R8G8B8:
+		case FORMAT_A8B8G8R8:
+		case FORMAT_A8:
+		case FORMAT_G16R16:
+		case FORMAT_A16B16G16R16:
+			if(!postBlendSRGB && state.writeSRGB)
+			{
+				linearToSRGB12_16(r, r.current);
+			}
+			else
+			{
+				r.current.x <<= 4;
+				r.current.y <<= 4;
+				r.current.z <<= 4;
+				r.current.w <<= 4;
+			}
+
+			if(state.targetFormat[0] == FORMAT_R5G6B5)
+			{
+				r.current.x &= Short4(0xF800u);
+				r.current.y &= Short4(0xFC00u);
+				r.current.z &= Short4(0xF800u);
+			}
+
+			fogBlend(r, r.current, fog, r.z[0], r.rhw);
+
+			for(unsigned int q = 0; q < state.multiSample; q++)
+			{
+				Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(r.data + OFFSET(DrawData, colorSliceB[0]));
+				Vector4s color = r.current;
+
+				if(state.multiSampleMask & (1 << q))
+				{
+					alphaBlend(r, 0, buffer, color, x);
+					logicOperation(r, 0, buffer, color, x);
+					writeColor(r, 0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+				}
+			}
+			break;
+		case FORMAT_R32F:
+		case FORMAT_G32R32F:
+		case FORMAT_A32B32G32R32F:
+			convertSigned12(oC, r.current);
+			PixelRoutine::fogBlend(r, oC, fog, r.z[0], r.rhw);
+
+			for(unsigned int q = 0; q < state.multiSample; q++)
+			{
+				Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(r.data + OFFSET(DrawData, colorSliceB[0]));
+				Vector4f color = oC;
+
+				if(state.multiSampleMask & (1 << q))
+				{
+					alphaBlend(r, 0, buffer, color, x);
+					writeColor(r, 0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+				}
+			}
+			break;
+		default:
+			ASSERT(false);
+		}

+	}

+

+	void PixelPipeline::blendTexture(Registers &r, Vector4s &temp, Vector4s &texture, int stage)
+	{
+		Vector4s *arg1;
+		Vector4s *arg2;
+		Vector4s *arg3;
+		Vector4s res;
+
+		Vector4s constant;
+		Vector4s tfactor;
+
+		const TextureStage::State &textureStage = state.textureStage[stage];
+
+		if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
+		   textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
+		   textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
+		   textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
+		   textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
+		   textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
+		{
+			constant.x = *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].constantColor4[0]));
+			constant.y = *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].constantColor4[1]));
+			constant.z = *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].constantColor4[2]));
+			constant.w = *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].constantColor4[3]));
+		}
+
+		if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
+		   textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
+		   textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
+		   textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
+		   textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
+		   textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
+		{
+			tfactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData, factor.textureFactor4[0]));
+			tfactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData, factor.textureFactor4[1]));
+			tfactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData, factor.textureFactor4[2]));
+			tfactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData, factor.textureFactor4[3]));
+		}
+
+		// Premodulate
+		if(stage > 0 && textureStage.usesTexture)
+		{
+			if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
+			{
+				r.current.x = MulHigh(r.current.x, texture.x) << 4;
+				r.current.y = MulHigh(r.current.y, texture.y) << 4;
+				r.current.z = MulHigh(r.current.z, texture.z) << 4;
+			}
+
+			if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
+			{
+				r.current.w = MulHigh(r.current.w, texture.w) << 4;
+			}
+		}
+
+		if(luminance)
+		{
+			texture.x = MulHigh(texture.x, r.L) << 4;
+			texture.y = MulHigh(texture.y, r.L) << 4;
+			texture.z = MulHigh(texture.z, r.L) << 4;
+
+			luminance = false;
+		}
+
+		switch(textureStage.firstArgument)
+		{
+		case TextureStage::SOURCE_TEXTURE:	arg1 = &texture;    break;
+		case TextureStage::SOURCE_CONSTANT:	arg1 = &constant;   break;
+		case TextureStage::SOURCE_CURRENT:	arg1 = &r.current;  break;
+		case TextureStage::SOURCE_DIFFUSE:	arg1 = &r.diffuse;  break;
+		case TextureStage::SOURCE_SPECULAR:	arg1 = &r.specular; break;
+		case TextureStage::SOURCE_TEMP:		arg1 = &temp;       break;
+		case TextureStage::SOURCE_TFACTOR:	arg1 = &tfactor;    break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.secondArgument)
+		{
+		case TextureStage::SOURCE_TEXTURE:	arg2 = &texture;    break;
+		case TextureStage::SOURCE_CONSTANT:	arg2 = &constant;   break;
+		case TextureStage::SOURCE_CURRENT:	arg2 = &r.current;  break;
+		case TextureStage::SOURCE_DIFFUSE:	arg2 = &r.diffuse;  break;
+		case TextureStage::SOURCE_SPECULAR:	arg2 = &r.specular; break;
+		case TextureStage::SOURCE_TEMP:		arg2 = &temp;       break;
+		case TextureStage::SOURCE_TFACTOR:	arg2 = &tfactor;    break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.thirdArgument)
+		{
+		case TextureStage::SOURCE_TEXTURE:	arg3 = &texture;    break;
+		case TextureStage::SOURCE_CONSTANT:	arg3 = &constant;   break;
+		case TextureStage::SOURCE_CURRENT:	arg3 = &r.current;  break;
+		case TextureStage::SOURCE_DIFFUSE:	arg3 = &r.diffuse;  break;
+		case TextureStage::SOURCE_SPECULAR:	arg3 = &r.specular; break;
+		case TextureStage::SOURCE_TEMP:		arg3 = &temp;       break;
+		case TextureStage::SOURCE_TFACTOR:	arg3 = &tfactor;    break;
+		default:
+			ASSERT(false);
+		}
+
+		Vector4s mod1;
+		Vector4s mod2;
+		Vector4s mod3;
+
+		switch(textureStage.firstModifier)
+		{
+		case TextureStage::MODIFIER_COLOR:
+			break;
+		case TextureStage::MODIFIER_INVCOLOR:
+			mod1.x = SubSat(Short4(0x1000), arg1->x);
+			mod1.y = SubSat(Short4(0x1000), arg1->y);
+			mod1.z = SubSat(Short4(0x1000), arg1->z);
+			mod1.w = SubSat(Short4(0x1000), arg1->w);
+
+			arg1 = &mod1;
+			break;
+		case TextureStage::MODIFIER_ALPHA:
+			mod1.x = arg1->w;
+			mod1.y = arg1->w;
+			mod1.z = arg1->w;
+			mod1.w = arg1->w;
+
+			arg1 = &mod1;
+			break;
+		case TextureStage::MODIFIER_INVALPHA:
+			mod1.x = SubSat(Short4(0x1000), arg1->w);
+			mod1.y = SubSat(Short4(0x1000), arg1->w);
+			mod1.z = SubSat(Short4(0x1000), arg1->w);
+			mod1.w = SubSat(Short4(0x1000), arg1->w);
+
+			arg1 = &mod1;
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.secondModifier)
+		{
+		case TextureStage::MODIFIER_COLOR:
+			break;
+		case TextureStage::MODIFIER_INVCOLOR:
+			mod2.x = SubSat(Short4(0x1000), arg2->x);
+			mod2.y = SubSat(Short4(0x1000), arg2->y);
+			mod2.z = SubSat(Short4(0x1000), arg2->z);
+			mod2.w = SubSat(Short4(0x1000), arg2->w);
+
+			arg2 = &mod2;
+			break;
+		case TextureStage::MODIFIER_ALPHA:
+			mod2.x = arg2->w;
+			mod2.y = arg2->w;
+			mod2.z = arg2->w;
+			mod2.w = arg2->w;
+
+			arg2 = &mod2;
+			break;
+		case TextureStage::MODIFIER_INVALPHA:
+			mod2.x = SubSat(Short4(0x1000), arg2->w);
+			mod2.y = SubSat(Short4(0x1000), arg2->w);
+			mod2.z = SubSat(Short4(0x1000), arg2->w);
+			mod2.w = SubSat(Short4(0x1000), arg2->w);
+
+			arg2 = &mod2;
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.thirdModifier)
+		{
+		case TextureStage::MODIFIER_COLOR:
+			break;
+		case TextureStage::MODIFIER_INVCOLOR:
+			mod3.x = SubSat(Short4(0x1000), arg3->x);
+			mod3.y = SubSat(Short4(0x1000), arg3->y);
+			mod3.z = SubSat(Short4(0x1000), arg3->z);
+			mod3.w = SubSat(Short4(0x1000), arg3->w);
+
+			arg3 = &mod3;
+			break;
+		case TextureStage::MODIFIER_ALPHA:
+			mod3.x = arg3->w;
+			mod3.y = arg3->w;
+			mod3.z = arg3->w;
+			mod3.w = arg3->w;
+
+			arg3 = &mod3;
+			break;
+		case TextureStage::MODIFIER_INVALPHA:
+			mod3.x = SubSat(Short4(0x1000), arg3->w);
+			mod3.y = SubSat(Short4(0x1000), arg3->w);
+			mod3.z = SubSat(Short4(0x1000), arg3->w);
+			mod3.w = SubSat(Short4(0x1000), arg3->w);
+
+			arg3 = &mod3;
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.stageOperation)
+		{
+		case TextureStage::STAGE_DISABLE:
+			break;
+		case TextureStage::STAGE_SELECTARG1: // Arg1
+			res.x = arg1->x;
+			res.y = arg1->y;
+			res.z = arg1->z;
+			break;
+		case TextureStage::STAGE_SELECTARG2: // Arg2
+			res.x = arg2->x;
+			res.y = arg2->y;
+			res.z = arg2->z;
+			break;
+		case TextureStage::STAGE_SELECTARG3: // Arg3
+			res.x = arg3->x;
+			res.y = arg3->y;
+			res.z = arg3->z;
+			break;
+		case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
+			res.x = MulHigh(arg1->x, arg2->x) << 4;
+			res.y = MulHigh(arg1->y, arg2->y) << 4;
+			res.z = MulHigh(arg1->z, arg2->z) << 4;
+			break;
+		case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
+			res.x = MulHigh(arg1->x, arg2->x) << 5;
+			res.y = MulHigh(arg1->y, arg2->y) << 5;
+			res.z = MulHigh(arg1->z, arg2->z) << 5;
+			break;
+		case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
+			res.x = MulHigh(arg1->x, arg2->x) << 6;
+			res.y = MulHigh(arg1->y, arg2->y) << 6;
+			res.z = MulHigh(arg1->z, arg2->z) << 6;
+			break;
+		case TextureStage::STAGE_ADD: // Arg1 + Arg2
+			res.x = AddSat(arg1->x, arg2->x);
+			res.y = AddSat(arg1->y, arg2->y);
+			res.z = AddSat(arg1->z, arg2->z);
+			break;
+		case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
+			res.x = AddSat(arg1->x, arg2->x);
+			res.y = AddSat(arg1->y, arg2->y);
+			res.z = AddSat(arg1->z, arg2->z);
+
+			res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			break;
+		case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
+			res.x = AddSat(arg1->x, arg2->x);
+			res.y = AddSat(arg1->y, arg2->y);
+			res.z = AddSat(arg1->z, arg2->z);
+
+			res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+
+			res.x = AddSat(res.x, res.x);
+			res.y = AddSat(res.y, res.y);
+			res.z = AddSat(res.z, res.z);
+			break;
+		case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
+			res.x = SubSat(arg1->x, arg2->x);
+			res.y = SubSat(arg1->y, arg2->y);
+			res.z = SubSat(arg1->z, arg2->z);
+			break;
+		case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
+			{
+				Short4 tmp;
+
+				tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
+				tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
+				tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
+			}
+			break;
+		case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
+			res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
+			res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
+			res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
+			break;
+		case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
+			res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
+			res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
+			res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
+			break;
+		case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
+			{
+				Short4 tmp;
+
+				res.x = SubSat(arg1->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.x = MulHigh(res.x, tmp);
+				res.y = SubSat(arg1->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.y = MulHigh(res.y, tmp);
+				res.z = SubSat(arg1->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.z = MulHigh(res.z, tmp);
+
+				res.x = res.x << 6;
+				res.y = res.y << 6;
+				res.z = res.z << 6;
+
+				res.x = AddSat(res.x, res.y);
+				res.x = AddSat(res.x, res.z);
+
+				// Clamp to [0, 1]
+				res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+				res.x = Min(res.x, Short4(0x1000));
+
+				res.y = res.x;
+				res.z = res.x;
+				res.w = res.x;
+			}
+			break;
+		case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
+			res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, r.current.w) << 4; res.x = AddSat(res.x, arg2->x);
+			res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, r.current.w) << 4; res.y = AddSat(res.y, arg2->y);
+			res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, r.current.w) << 4; res.z = AddSat(res.z, arg2->z);
+			break;
+		case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
+			res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, r.diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
+			res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, r.diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
+			res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, r.diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
+			break;
+		case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
+			res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(r.data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
+			res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(r.data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
+			res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(r.data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
+			break;
+		case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
+			res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
+			res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
+			res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
+			break;
+		case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
+			res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
+			res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
+			res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
+			break;
+		case TextureStage::STAGE_PREMODULATE:
+			res.x = arg1->x;
+			res.y = arg1->y;
+			res.z = arg1->z;
+			break;
+		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2
+			res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
+			res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
+			res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
+			break;
+		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w
+			res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
+			res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
+			res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
+			break;
+		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1
+			{
+				Short4 tmp;
+
+				res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
+				res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
+				res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
+			}
+			break;
+		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w
+			{
+				Short4 tmp;
+
+				res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
+				res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
+				res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
+			}
+			break;
+		case TextureStage::STAGE_BUMPENVMAP:
+			{
+				r.du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
+				r.dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
+
+				Float4 du2;
+				Float4 dv2;
+
+				du2 = r.du;
+				dv2 = r.dv;
+				r.du *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
+				dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
+				r.du += dv2;
+				r.dv *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
+				du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
+				r.dv += du2;
+
+				perturbate = true;
+
+				res.x = r.current.x;
+				res.y = r.current.y;
+				res.z = r.current.z;
+				res.w = r.current.w;
+			}
+			break;
+		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
+			{
+				r.du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
+				r.dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
+
+				Float4 du2;
+				Float4 dv2;
+
+				du2 = r.du;
+				dv2 = r.dv;
+
+				r.du *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
+				dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
+				r.du += dv2;
+				r.dv *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
+				du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
+				r.dv += du2;
+
+				perturbate = true;
+
+				r.L = texture.z;
+				r.L = MulHigh(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
+				r.L = r.L << 4;
+				r.L = AddSat(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
+				r.L = Max(r.L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+				r.L = Min(r.L, Short4(0x1000));
+
+				luminance = true;
+
+				res.x = r.current.x;
+				res.y = r.current.y;
+				res.z = r.current.z;
+				res.w = r.current.w;
+			}
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
+		{
+			switch(textureStage.firstArgumentAlpha)
+			{
+			case TextureStage::SOURCE_TEXTURE:	arg1 = &texture;		break;
+			case TextureStage::SOURCE_CONSTANT:	arg1 = &constant;		break;
+			case TextureStage::SOURCE_CURRENT:	arg1 = &r.current;		break;
+			case TextureStage::SOURCE_DIFFUSE:	arg1 = &r.diffuse;		break;
+			case TextureStage::SOURCE_SPECULAR:	arg1 = &r.specular;		break;
+			case TextureStage::SOURCE_TEMP:		arg1 = &temp;			break;
+			case TextureStage::SOURCE_TFACTOR:	arg1 = &tfactor;		break;
+			default:
+				ASSERT(false);
+			}
+
+			switch(textureStage.secondArgumentAlpha)
+			{
+			case TextureStage::SOURCE_TEXTURE:	arg2 = &texture;		break;
+			case TextureStage::SOURCE_CONSTANT:	arg2 = &constant;		break;
+			case TextureStage::SOURCE_CURRENT:	arg2 = &r.current;		break;
+			case TextureStage::SOURCE_DIFFUSE:	arg2 = &r.diffuse;		break;
+			case TextureStage::SOURCE_SPECULAR:	arg2 = &r.specular;		break;
+			case TextureStage::SOURCE_TEMP:		arg2 = &temp;			break;
+			case TextureStage::SOURCE_TFACTOR:	arg2 = &tfactor;		break;
+			default:
+				ASSERT(false);
+			}
+
+			switch(textureStage.thirdArgumentAlpha)
+			{
+			case TextureStage::SOURCE_TEXTURE:	arg3 = &texture;		break;
+			case TextureStage::SOURCE_CONSTANT:	arg3 = &constant;		break;
+			case TextureStage::SOURCE_CURRENT:	arg3 = &r.current;		break;
+			case TextureStage::SOURCE_DIFFUSE:	arg3 = &r.diffuse;		break;
+			case TextureStage::SOURCE_SPECULAR:	arg3 = &r.specular;		break;
+			case TextureStage::SOURCE_TEMP:		arg3 = &temp;			break;
+			case TextureStage::SOURCE_TFACTOR:	arg3 = &tfactor;		break;
+			default:
+				ASSERT(false);
+			}
+
+			switch(textureStage.firstModifierAlpha)   // FIXME: Check if actually used
+			{
+			case TextureStage::MODIFIER_COLOR:
+				break;
+			case TextureStage::MODIFIER_INVCOLOR:
+				mod1.w = SubSat(Short4(0x1000), arg1->w);
+
+				arg1 = &mod1;
+				break;
+			case TextureStage::MODIFIER_ALPHA:
+				// Redudant
+				break;
+			case TextureStage::MODIFIER_INVALPHA:
+				mod1.w = SubSat(Short4(0x1000), arg1->w);
+
+				arg1 = &mod1;
+				break;
+			default:
+				ASSERT(false);
+			}
+
+			switch(textureStage.secondModifierAlpha)   // FIXME: Check if actually used
+			{
+			case TextureStage::MODIFIER_COLOR:
+				break;
+			case TextureStage::MODIFIER_INVCOLOR:
+				mod2.w = SubSat(Short4(0x1000), arg2->w);
+
+				arg2 = &mod2;
+				break;
+			case TextureStage::MODIFIER_ALPHA:
+				// Redudant
+				break;
+			case TextureStage::MODIFIER_INVALPHA:
+				mod2.w = SubSat(Short4(0x1000), arg2->w);
+
+				arg2 = &mod2;
+				break;
+			default:
+				ASSERT(false);
+			}
+
+			switch(textureStage.thirdModifierAlpha)   // FIXME: Check if actually used
+			{
+			case TextureStage::MODIFIER_COLOR:
+				break;
+			case TextureStage::MODIFIER_INVCOLOR:
+				mod3.w = SubSat(Short4(0x1000), arg3->w);
+
+				arg3 = &mod3;
+				break;
+			case TextureStage::MODIFIER_ALPHA:
+				// Redudant
+				break;
+			case TextureStage::MODIFIER_INVALPHA:
+				mod3.w = SubSat(Short4(0x1000), arg3->w);
+
+				arg3 = &mod3;
+				break;
+			default:
+				ASSERT(false);
+			}
+
+			switch(textureStage.stageOperationAlpha)
+			{
+			case TextureStage::STAGE_DISABLE:
+				break;
+			case TextureStage::STAGE_SELECTARG1: // Arg1
+				res.w = arg1->w;
+				break;
+			case TextureStage::STAGE_SELECTARG2: // Arg2
+				res.w = arg2->w;
+				break;
+			case TextureStage::STAGE_SELECTARG3: // Arg3
+				res.w = arg3->w;
+				break;
+			case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
+				res.w = MulHigh(arg1->w, arg2->w) << 4;
+				break;
+			case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
+				res.w = MulHigh(arg1->w, arg2->w) << 5;
+				break;
+			case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
+				res.w = MulHigh(arg1->w, arg2->w) << 6;
+				break;
+			case TextureStage::STAGE_ADD: // Arg1 + Arg2
+				res.w = AddSat(arg1->w, arg2->w);
+				break;
+			case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
+				res.w = AddSat(arg1->w, arg2->w);
+				res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+				break;
+			case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
+				res.w = AddSat(arg1->w, arg2->w);
+				res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+				res.w = AddSat(res.w, res.w);
+				break;
+			case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
+				res.w = SubSat(arg1->w, arg2->w);
+				break;
+			case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
+				{
+					Short4 tmp;
+
+					tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
+				}
+				break;
+			case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
+				res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
+				break;
+			case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
+				res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
+				break;
+			case TextureStage::STAGE_DOT3:
+				break;   // Already computed in color channel
+			case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
+				res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, r.current.w) << 4; res.w = AddSat(res.w, arg2->w);
+				break;
+			case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
+				res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, r.diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
+				break;
+			case TextureStage::STAGE_BLENDFACTORALPHA:
+				res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(r.data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
+				break;
+			case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
+				res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
+				break;
+			case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
+				res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
+				break;
+			case TextureStage::STAGE_PREMODULATE:
+				res.w = arg1->w;
+				break;
+			case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
+			case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
+			case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
+			case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
+			case TextureStage::STAGE_BUMPENVMAP:
+			case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
+				break;   // Invalid alpha operations
+			default:
+				ASSERT(false);
+			}
+		}
+
+		// Clamp result to [0, 1]
+
+		switch(textureStage.stageOperation)
+		{
+		case TextureStage::STAGE_DISABLE:
+		case TextureStage::STAGE_SELECTARG1:
+		case TextureStage::STAGE_SELECTARG2:
+		case TextureStage::STAGE_SELECTARG3:
+		case TextureStage::STAGE_MODULATE:
+		case TextureStage::STAGE_MODULATE2X:
+		case TextureStage::STAGE_MODULATE4X:
+		case TextureStage::STAGE_ADD:
+		case TextureStage::STAGE_MULTIPLYADD:
+		case TextureStage::STAGE_LERP:
+		case TextureStage::STAGE_BLENDCURRENTALPHA:
+		case TextureStage::STAGE_BLENDDIFFUSEALPHA:
+		case TextureStage::STAGE_BLENDFACTORALPHA:
+		case TextureStage::STAGE_BLENDTEXTUREALPHA:
+		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
+		case TextureStage::STAGE_DOT3:   // Already clamped
+		case TextureStage::STAGE_PREMODULATE:
+		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
+		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
+		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
+		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
+		case TextureStage::STAGE_BUMPENVMAP:
+		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
+			if(state.textureStage[stage].cantUnderflow)
+			{
+				break;   // Can't go below zero
+			}
+		case TextureStage::STAGE_ADDSIGNED:
+		case TextureStage::STAGE_ADDSIGNED2X:
+		case TextureStage::STAGE_SUBTRACT:
+		case TextureStage::STAGE_ADDSMOOTH:
+			res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+			res.y = Max(res.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+			res.z = Max(res.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.stageOperationAlpha)
+		{
+		case TextureStage::STAGE_DISABLE:
+		case TextureStage::STAGE_SELECTARG1:
+		case TextureStage::STAGE_SELECTARG2:
+		case TextureStage::STAGE_SELECTARG3:
+		case TextureStage::STAGE_MODULATE:
+		case TextureStage::STAGE_MODULATE2X:
+		case TextureStage::STAGE_MODULATE4X:
+		case TextureStage::STAGE_ADD:
+		case TextureStage::STAGE_MULTIPLYADD:
+		case TextureStage::STAGE_LERP:
+		case TextureStage::STAGE_BLENDCURRENTALPHA:
+		case TextureStage::STAGE_BLENDDIFFUSEALPHA:
+		case TextureStage::STAGE_BLENDFACTORALPHA:
+		case TextureStage::STAGE_BLENDTEXTUREALPHA:
+		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
+		case TextureStage::STAGE_DOT3:   // Already clamped
+		case TextureStage::STAGE_PREMODULATE:
+		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
+		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
+		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
+		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
+		case TextureStage::STAGE_BUMPENVMAP:
+		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
+			if(state.textureStage[stage].cantUnderflow)
+			{
+				break;   // Can't go below zero
+			}
+		case TextureStage::STAGE_ADDSIGNED:
+		case TextureStage::STAGE_ADDSIGNED2X:
+		case TextureStage::STAGE_SUBTRACT:
+		case TextureStage::STAGE_ADDSMOOTH:
+			res.w = Max(res.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.stageOperation)
+		{
+		case TextureStage::STAGE_DISABLE:
+		case TextureStage::STAGE_SELECTARG1:
+		case TextureStage::STAGE_SELECTARG2:
+		case TextureStage::STAGE_SELECTARG3:
+		case TextureStage::STAGE_MODULATE:
+		case TextureStage::STAGE_SUBTRACT:
+		case TextureStage::STAGE_ADDSMOOTH:
+		case TextureStage::STAGE_LERP:
+		case TextureStage::STAGE_BLENDCURRENTALPHA:
+		case TextureStage::STAGE_BLENDDIFFUSEALPHA:
+		case TextureStage::STAGE_BLENDFACTORALPHA:
+		case TextureStage::STAGE_BLENDTEXTUREALPHA:
+		case TextureStage::STAGE_DOT3:   // Already clamped
+		case TextureStage::STAGE_PREMODULATE:
+		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
+		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
+		case TextureStage::STAGE_BUMPENVMAP:
+		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
+			break;   // Can't go above one
+		case TextureStage::STAGE_MODULATE2X:
+		case TextureStage::STAGE_MODULATE4X:
+		case TextureStage::STAGE_ADD:
+		case TextureStage::STAGE_ADDSIGNED:
+		case TextureStage::STAGE_ADDSIGNED2X:
+		case TextureStage::STAGE_MULTIPLYADD:
+		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
+		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
+		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
+			res.x = Min(res.x, Short4(0x1000));
+			res.y = Min(res.y, Short4(0x1000));
+			res.z = Min(res.z, Short4(0x1000));
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.stageOperationAlpha)
+		{
+		case TextureStage::STAGE_DISABLE:
+		case TextureStage::STAGE_SELECTARG1:
+		case TextureStage::STAGE_SELECTARG2:
+		case TextureStage::STAGE_SELECTARG3:
+		case TextureStage::STAGE_MODULATE:
+		case TextureStage::STAGE_SUBTRACT:
+		case TextureStage::STAGE_ADDSMOOTH:
+		case TextureStage::STAGE_LERP:
+		case TextureStage::STAGE_BLENDCURRENTALPHA:
+		case TextureStage::STAGE_BLENDDIFFUSEALPHA:
+		case TextureStage::STAGE_BLENDFACTORALPHA:
+		case TextureStage::STAGE_BLENDTEXTUREALPHA:
+		case TextureStage::STAGE_DOT3:   // Already clamped
+		case TextureStage::STAGE_PREMODULATE:
+		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
+		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
+		case TextureStage::STAGE_BUMPENVMAP:
+		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
+			break;   // Can't go above one
+		case TextureStage::STAGE_MODULATE2X:
+		case TextureStage::STAGE_MODULATE4X:
+		case TextureStage::STAGE_ADD:
+		case TextureStage::STAGE_ADDSIGNED:
+		case TextureStage::STAGE_ADDSIGNED2X:
+		case TextureStage::STAGE_MULTIPLYADD:
+		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
+		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
+		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
+			res.w = Min(res.w, Short4(0x1000));
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		switch(textureStage.destinationArgument)
+		{
+		case TextureStage::DESTINATION_CURRENT:
+			r.current.x = res.x;
+			r.current.y = res.y;
+			r.current.z = res.z;
+			r.current.w = res.w;
+			break;
+		case TextureStage::DESTINATION_TEMP:
+			temp.x = res.x;
+			temp.y = res.y;
+			temp.z = res.z;
+			temp.w = res.w;
+			break;
+		default:
+			ASSERT(false);
+		}
+	}

+

+	void PixelPipeline::fogBlend(Registers &r, Vector4s &current, Float4 &f, Float4 &z, Float4 &rhw)
+	{
+		if(!state.fogActive)
+		{
+			return;
+		}
+
+		if(state.pixelFogMode != FOG_NONE)
+		{
+			pixelFog(r, f, z, rhw);
+		}
+
+		UShort4 fog = convertFixed16(f, true);
+
+		current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
+		current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
+		current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
+
+		UShort4 invFog = UShort4(0xFFFFu) - fog;
+
+		current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData, fog.color4[0]))));
+		current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData, fog.color4[1]))));
+		current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData, fog.color4[2]))));
+	}

+

+	void PixelPipeline::specularPixel(Vector4s &current, Vector4s &specular)
+	{
+		if(!state.specularAdd)
+		{
+			return;
+		}
+
+		current.x = AddSat(current.x, specular.x);
+		current.y = AddSat(current.y, specular.y);
+		current.z = AddSat(current.z, specular.z);
+	}

+

+	void PixelPipeline::sampleTexture(Registers &r, Vector4s &c, int coordinates, int stage, bool project)
+	{
+		Float4 u = r.vf[2 + coordinates].x;
+		Float4 v = r.vf[2 + coordinates].y;
+		Float4 w = r.vf[2 + coordinates].z;
+		Float4 q = r.vf[2 + coordinates].w;
+
+		if(perturbate)
+		{
+			u += r.du;
+			v += r.dv;
+
+			perturbate = false;
+		}
+
+		sampleTexture(r, c, stage, u, v, w, q, project);
+	}
+
+	void PixelPipeline::sampleTexture(Registers &r, Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project, bool bias)
+	{
+		Vector4f dsx;
+		Vector4f dsy;
+
+		sampleTexture(r, c, stage, u, v, w, q, dsx, dsy, project, bias, false);
+	}
+
+	void PixelPipeline::sampleTexture(Registers &r, Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
+	{
+#if PERF_PROFILE
+		Long texTime = Ticks();
+#endif
+
+		Pointer<Byte> texture = r.data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
+
+		if(!project)
+		{
+			sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, gradients, lodProvided);
+		}
+		else
+		{
+			Float4 rq = reciprocal(q);
+
+			Float4 u_q = u * rq;
+			Float4 v_q = v * rq;
+			Float4 w_q = w * rq;
+
+			sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, gradients, lodProvided);
+		}
+
+#if PERF_PROFILE
+		r.cycles[PERF_TEX] += Ticks() - texTime;
+#endif
+	}
+

+	Short4 PixelPipeline::convertFixed12(RValue<Float4> cf)
+	{
+		return RoundShort4(cf * Float4(0x1000));
+	}
+
+	void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf)
+	{
+		cs.x = convertFixed12(cf.x);
+		cs.y = convertFixed12(cf.y);
+		cs.z = convertFixed12(cf.z);
+		cs.w = convertFixed12(cf.w);
+	}
+
+	Float4 PixelPipeline::convertSigned12(Short4 &cs)
+	{
+		return Float4(cs) * Float4(1.0f / 0x0FFE);
+	}
+
+	void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs)
+	{
+		cf.x = convertSigned12(cs.x);
+		cf.y = convertSigned12(cs.y);
+		cf.z = convertSigned12(cs.z);
+		cf.w = convertSigned12(cs.w);
+	}
+

+	void PixelPipeline::writeDestination(Registers &r, Vector4s &d, const Dst &dst)
+	{
+		switch(dst.type)
+		{
+		case Shader::PARAMETER_TEMP:
+			if(dst.mask & 0x1) r.rs[dst.index].x = d.x;
+			if(dst.mask & 0x2) r.rs[dst.index].y = d.y;
+			if(dst.mask & 0x4) r.rs[dst.index].z = d.z;
+			if(dst.mask & 0x8) r.rs[dst.index].w = d.w;
+			break;
+		case Shader::PARAMETER_INPUT:
+			if(dst.mask & 0x1) r.vs[dst.index].x = d.x;
+			if(dst.mask & 0x2) r.vs[dst.index].y = d.y;
+			if(dst.mask & 0x4) r.vs[dst.index].z = d.z;
+			if(dst.mask & 0x8) r.vs[dst.index].w = d.w;
+			break;
+		case Shader::PARAMETER_CONST: ASSERT(false); break;
+		case Shader::PARAMETER_TEXTURE:
+			if(dst.mask & 0x1) r.ts[dst.index].x = d.x;
+			if(dst.mask & 0x2) r.ts[dst.index].y = d.y;
+			if(dst.mask & 0x4) r.ts[dst.index].z = d.z;
+			if(dst.mask & 0x8) r.ts[dst.index].w = d.w;
+			break;
+		case Shader::PARAMETER_COLOROUT:
+			if(dst.mask & 0x1) r.vs[dst.index].x = d.x;
+			if(dst.mask & 0x2) r.vs[dst.index].y = d.y;
+			if(dst.mask & 0x4) r.vs[dst.index].z = d.z;
+			if(dst.mask & 0x8) r.vs[dst.index].w = d.w;
+			break;
+		default:
+			ASSERT(false);
+		}
+	}
+

+	Vector4s PixelPipeline::fetchRegisterS(Registers &r, const Src &src)
+	{
+		Vector4s *reg;
+		int i = src.index;
+
+		Vector4s c;
+
+		if(src.type == Shader::PARAMETER_CONST)
+		{
+			c.x = *Pointer<Short4>(r.data + OFFSET(DrawData, ps.cW[i][0]));
+			c.y = *Pointer<Short4>(r.data + OFFSET(DrawData, ps.cW[i][1]));
+			c.z = *Pointer<Short4>(r.data + OFFSET(DrawData, ps.cW[i][2]));
+			c.w = *Pointer<Short4>(r.data + OFFSET(DrawData, ps.cW[i][3]));
+		}
+
+		switch(src.type)
+		{
+		case Shader::PARAMETER_TEMP:          reg = &r.rs[i]; break;
+		case Shader::PARAMETER_INPUT:         reg = &r.vs[i]; break;
+		case Shader::PARAMETER_CONST:         reg = &c;       break;
+		case Shader::PARAMETER_TEXTURE:       reg = &r.ts[i]; break;
+		case Shader::PARAMETER_VOID:          return r.rs[0]; // Dummy
+		case Shader::PARAMETER_FLOAT4LITERAL: return r.rs[0]; // Dummy
+		default:
+			ASSERT(false);
+		}
+
+		const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
+		const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
+		const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
+		const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
+
+		Vector4s mod;
+
+		switch(src.modifier)
+		{
+		case Shader::MODIFIER_NONE:
+			mod.x = x;
+			mod.y = y;
+			mod.z = z;
+			mod.w = w;
+			break;
+		case Shader::MODIFIER_BIAS:
+			mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			break;
+		case Shader::MODIFIER_BIAS_NEGATE:
+			mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
+			mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
+			mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
+			mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
+			break;
+		case Shader::MODIFIER_COMPLEMENT:
+			mod.x = SubSat(Short4(0x1000), x);
+			mod.y = SubSat(Short4(0x1000), y);
+			mod.z = SubSat(Short4(0x1000), z);
+			mod.w = SubSat(Short4(0x1000), w);
+			break;
+		case Shader::MODIFIER_NEGATE:
+			mod.x = -x;
+			mod.y = -y;
+			mod.z = -z;
+			mod.w = -w;
+			break;
+		case Shader::MODIFIER_X2:
+			mod.x = AddSat(x, x);
+			mod.y = AddSat(y, y);
+			mod.z = AddSat(z, z);
+			mod.w = AddSat(w, w);
+			break;
+		case Shader::MODIFIER_X2_NEGATE:
+			mod.x = -AddSat(x, x);
+			mod.y = -AddSat(y, y);
+			mod.z = -AddSat(z, z);
+			mod.w = -AddSat(w, w);
+			break;
+		case Shader::MODIFIER_SIGN:
+			mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
+			mod.x = AddSat(mod.x, mod.x);
+			mod.y = AddSat(mod.y, mod.y);
+			mod.z = AddSat(mod.z, mod.z);
+			mod.w = AddSat(mod.w, mod.w);
+			break;
+		case Shader::MODIFIER_SIGN_NEGATE:
+			mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
+			mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
+			mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
+			mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
+			mod.x = AddSat(mod.x, mod.x);
+			mod.y = AddSat(mod.y, mod.y);
+			mod.z = AddSat(mod.z, mod.z);
+			mod.w = AddSat(mod.w, mod.w);
+			break;
+		case Shader::MODIFIER_DZ:
+			mod.x = x;
+			mod.y = y;
+			mod.z = z;
+			mod.w = w;
+			// Projection performed by texture sampler
+			break;
+		case Shader::MODIFIER_DW:
+			mod.x = x;
+			mod.y = y;
+			mod.z = z;
+			mod.w = w;
+			// Projection performed by texture sampler
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
+		{
+			mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
+			mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
+			mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
+			mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
+		}
+
+		return mod;
+	}
+

+	void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0)
+	{
+		dst.x = src0.x;
+		dst.y = src0.y;
+		dst.z = src0.z;
+		dst.w = src0.w;
+	}
+
+	void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
+	{
+		dst.x = AddSat(src0.x, src1.x);
+		dst.y = AddSat(src0.y, src1.y);
+		dst.z = AddSat(src0.z, src1.z);
+		dst.w = AddSat(src0.w, src1.w);
+	}
+
+	void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
+	{
+		dst.x = SubSat(src0.x, src1.x);
+		dst.y = SubSat(src0.y, src1.y);
+		dst.z = SubSat(src0.z, src1.z);
+		dst.w = SubSat(src0.w, src1.w);
+	}
+
+	void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
+	{
+		// FIXME: Long fixed-point multiply fixup
+		{ dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
+		{
+		dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
+	}
+		{dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
+		{dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
+	}
+
+	void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
+	{
+		// FIXME: Long fixed-point multiply fixup
+		{ dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); }
+		{
+		dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y);
+	}
+		{dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
+		{dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
+	}
+
+	void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
+	{
+		Short4 t0;
+		Short4 t1;
+
+		// FIXME: Long fixed-point multiply fixup
+		t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
+		t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
+		t0 = AddSat(t0, t1);
+		t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
+		t0 = AddSat(t0, t1);
+
+		dst.x = t0;
+		dst.y = t0;
+		dst.z = t0;
+		dst.w = t0;
+	}
+
+	void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
+	{
+		Short4 t0;
+		Short4 t1;
+
+		// FIXME: Long fixed-point multiply fixup
+		t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
+		t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
+		t0 = AddSat(t0, t1);
+		t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
+		t0 = AddSat(t0, t1);
+		t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
+		t0 = AddSat(t0, t1);
+
+		dst.x = t0;
+		dst.y = t0;
+		dst.z = t0;
+		dst.w = t0;
+	}
+
+	void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
+	{
+		// FIXME: Long fixed-point multiply fixup
+		{ dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
+		{
+		dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
+	}
+		{dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
+		{dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
+	}
+
+	void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
+	{
+		Float4 uw;
+		Float4 vw;
+		Float4 sw;
+
+		if(state.interpolant[2 + coordinate].component & 0x01)
+		{
+			uw = Max(u, Float4(0.0f));
+			uw = Min(uw, Float4(1.0f));
+			dst.x = convertFixed12(uw);
+		}
+		else
+		{
+			dst.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
+		}
+
+		if(state.interpolant[2 + coordinate].component & 0x02)
+		{
+			vw = Max(v, Float4(0.0f));
+			vw = Min(vw, Float4(1.0f));
+			dst.y = convertFixed12(vw);
+		}
+		else
+		{
+			dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
+		}
+
+		if(state.interpolant[2 + coordinate].component & 0x04)
+		{
+			sw = Max(s, Float4(0.0f));
+			sw = Min(sw, Float4(1.0f));
+			dst.z = convertFixed12(sw);
+		}
+		else
+		{
+			dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
+		}
+
+		dst.w = Short4(0x1000);
+	}
+
+	void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
+	{
+		Float4 uw = u;
+		Float4 vw = v;
+		Float4 sw = s;
+
+		if(project)
+		{
+			uw *= Rcp_pp(s);
+			vw *= Rcp_pp(s);
+		}
+
+		if(state.interpolant[2 + coordinate].component & 0x01)
+		{
+			uw *= Float4(0x1000);
+			uw = Max(uw, Float4(-0x8000));
+			uw = Min(uw, Float4(0x7FFF));
+			dst.x = RoundShort4(uw);
+		}
+		else
+		{
+			dst.x = Short4(0x0000);
+		}
+
+		if(state.interpolant[2 + coordinate].component & 0x02)
+		{
+			vw *= Float4(0x1000);
+			vw = Max(vw, Float4(-0x8000));
+			vw = Min(vw, Float4(0x7FFF));
+			dst.y = RoundShort4(vw);
+		}
+		else
+		{
+			dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
+		}
+
+		if(state.interpolant[2 + coordinate].component & 0x04)
+		{
+			sw *= Float4(0x1000);
+			sw = Max(sw, Float4(-0x8000));
+			sw = Min(sw, Float4(0x7FFF));
+			dst.z = RoundShort4(sw);
+		}
+		else
+		{
+			dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
+		}
+	}
+
+	void PixelPipeline::TEXDP3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
+	{
+		TEXM3X3PAD(r, u, v, s, src, 0, false);
+
+		Short4 t0 = RoundShort4(r.u_ * Float4(0x1000));
+
+		dst.x = t0;
+		dst.y = t0;
+		dst.z = t0;
+		dst.w = t0;
+	}
+
+	void PixelPipeline::TEXDP3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
+	{
+		TEXM3X3PAD(r, u, v, s, src0, 0, false);
+
+		r.v_ = Float4(0.0f);
+		r.w_ = Float4(0.0f);
+
+		sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
+	}
+
+	void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
+	{
+		Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
+			SignMask(CmpNLT(v, Float4(0.0f))) &
+			SignMask(CmpNLT(s, Float4(0.0f)));
+
+		for(unsigned int q = 0; q < state.multiSample; q++)
+		{
+			cMask[q] &= kill;
+		}
+	}
+
+	void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
+	{
+		Short4 test = src.x | src.y | src.z;
+		Int kill = SignMask(Pack(test, test)) ^ 0x0000000F;
+
+		for(unsigned int q = 0; q < state.multiSample; q++)
+		{
+			cMask[q] &= kill;
+		}
+	}
+
+	void PixelPipeline::TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
+	{
+		sampleTexture(r, dst, sampler, u, v, s, s, project);
+	}
+
+	void PixelPipeline::TEXLD(Registers &r, Vector4s &dst, Vector4s &src, int sampler, bool project)
+	{
+		Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
+		Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
+		Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
+
+		sampleTexture(r, dst, sampler, u, v, s, s, project);
+	}
+
+	void PixelPipeline::TEXBEM(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
+	{
+		Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
+		Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
+
+		Float4 du2 = du;
+		Float4 dv2 = dv;
+
+		du *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
+		dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
+		du += dv2;
+		dv *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
+		du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
+		dv += du2;
+
+		Float4 u_ = u + du;
+		Float4 v_ = v + dv;
+
+		sampleTexture(r, dst, stage, u_, v_, s, s);
+	}
+
+	void PixelPipeline::TEXBEML(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
+	{
+		Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
+		Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
+
+		Float4 du2 = du;
+		Float4 dv2 = dv;
+
+		du *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
+		dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
+		du += dv2;
+		dv *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
+		du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
+		dv += du2;
+
+		Float4 u_ = u + du;
+		Float4 v_ = v + dv;
+
+		sampleTexture(r, dst, stage, u_, v_, s, s);
+
+		Short4 L;
+
+		L = src.z;
+		L = MulHigh(L, *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
+		L = L << 4;
+		L = AddSat(L, *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
+		L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
+		L = Min(L, Short4(0x1000));
+
+		dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
+		dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
+		dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
+	}
+
+	void PixelPipeline::TEXREG2AR(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
+	{
+		Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
+		Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
+		Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
+
+		sampleTexture(r, dst, stage, u, v, s, s);
+	}
+
+	void PixelPipeline::TEXREG2GB(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
+	{
+		Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
+		Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
+		Float4 s = v;
+
+		sampleTexture(r, dst, stage, u, v, s, s);
+	}
+
+	void PixelPipeline::TEXREG2RGB(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
+	{
+		Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
+		Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
+		Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
+
+		sampleTexture(r, dst, stage, u, v, s, s);
+	}
+
+	void PixelPipeline::TEXM3X2DEPTH(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
+	{
+		TEXM3X2PAD(r, u, v, s, src, 1, signedScaling);
+
+		// z / w
+		r.u_ *= Rcp_pp(r.v_);   // FIXME: Set result to 1.0 when division by zero
+
+		r.oDepth = r.u_;
+	}
+
+	void PixelPipeline::TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
+	{
+		TEXM3X3PAD(r, u, v, s, src0, component, signedScaling);
+	}
+
+	void PixelPipeline::TEXM3X2TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
+	{
+		TEXM3X2PAD(r, u, v, s, src0, 1, signedScaling);
+
+		r.w_ = Float4(0.0f);
+
+		sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
+	}
+
+	void PixelPipeline::TEXM3X3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
+	{
+		TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling);
+
+		dst.x = RoundShort4(r.u_ * Float4(0x1000));
+		dst.y = RoundShort4(r.v_ * Float4(0x1000));
+		dst.z = RoundShort4(r.w_ * Float4(0x1000));
+		dst.w = Short4(0x1000);
+	}
+
+	void PixelPipeline::TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
+	{
+		if(component == 0 || previousScaling != signedScaling)   // FIXME: Other source modifiers?
+		{
+			r.U = Float4(src0.x);
+			r.V = Float4(src0.y);
+			r.W = Float4(src0.z);
+
+			previousScaling = signedScaling;
+		}
+
+		Float4 x = r.U * u + r.V * v + r.W * s;
+
+		x *= Float4(1.0f / 0x1000);
+
+		switch(component)
+		{
+		case 0:	r.u_ = x; break;
+		case 1:	r.v_ = x; break;
+		case 2: r.w_ = x; break;
+		default: ASSERT(false);
+		}
+	}
+
+	void PixelPipeline::TEXM3X3SPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
+	{
+		TEXM3X3PAD(r, u, v, s, src0, 2, false);
+
+		Float4 E[3];   // Eye vector
+
+		E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
+		E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
+		E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
+
+		// Reflection
+		Float4 u__;
+		Float4 v__;
+		Float4 w__;
+
+		// (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
+		u__ = r.u_ * E[0];
+		v__ = r.v_ * E[1];
+		w__ = r.w_ * E[2];
+		u__ += v__ + w__;
+		u__ += u__;
+		v__ = u__;
+		w__ = u__;
+		u__ *= r.u_;
+		v__ *= r.v_;
+		w__ *= r.w_;
+		r.u_ *= r.u_;
+		r.v_ *= r.v_;
+		r.w_ *= r.w_;
+		r.u_ += r.v_ + r.w_;
+		u__ -= E[0] * r.u_;
+		v__ -= E[1] * r.u_;
+		w__ -= E[2] * r.u_;
+
+		sampleTexture(r, dst, stage, u__, v__, w__, w__);
+	}
+
+	void PixelPipeline::TEXM3X3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
+	{
+		TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling);
+
+		sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
+	}
+
+	void PixelPipeline::TEXM3X3VSPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
+	{
+		TEXM3X3PAD(r, u, v, s, src0, 2, false);
+
+		Float4 E[3];   // Eye vector
+
+		E[0] = r.vf[2 + stage - 2].w;
+		E[1] = r.vf[2 + stage - 1].w;
+		E[2] = r.vf[2 + stage - 0].w;
+
+		// Reflection
+		Float4 u__;
+		Float4 v__;
+		Float4 w__;
+
+		// (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
+		u__ = r.u_ * E[0];
+		v__ = r.v_ * E[1];
+		w__ = r.w_ * E[2];
+		u__ += v__ + w__;
+		u__ += u__;
+		v__ = u__;
+		w__ = u__;
+		u__ *= r.u_;
+		v__ *= r.v_;
+		w__ *= r.w_;
+		r.u_ *= r.u_;
+		r.v_ *= r.v_;
+		r.w_ *= r.w_;
+		r.u_ += r.v_ + r.w_;
+		u__ -= E[0] * r.u_;
+		v__ -= E[1] * r.u_;
+		w__ -= E[2] * r.u_;
+
+		sampleTexture(r, dst, stage, u__, v__, w__, w__);
+	}
+
+	void PixelPipeline::TEXDEPTH(Registers &r)
+	{
+		r.u_ = Float4(r.rs[5].x);
+		r.v_ = Float4(r.rs[5].y);
+
+		// z / w
+		r.u_ *= Rcp_pp(r.v_);   // FIXME: Set result to 1.0 when division by zero
+
+		r.oDepth = r.u_;
+	}
+
+	void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
+	{
+		{ Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; };
+		{Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; };
+		{Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; };
+		{Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; };
+	}
+
+	void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
+	{
+		{ Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; };
+		{Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; };
+		{Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; };
+		{Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; };
+	}
+
+	void PixelPipeline::BEM(Registers &r, Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
+	{
+		Short4 t0;
+		Short4 t1;
+
+		// dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
+		t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4;   // FIXME: Matrix components range? Overflow hazard.
+		t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4;   // FIXME: Matrix components range? Overflow hazard.
+		t0 = AddSat(t0, t1);
+		t0 = AddSat(t0, src0.x);
+		dst.x = t0;
+
+		// dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
+		t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4;   // FIXME: Matrix components range? Overflow hazard.
+		t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4;   // FIXME: Matrix components range? Overflow hazard.
+		t0 = AddSat(t0, t1);
+		t0 = AddSat(t0, src0.y);
+		dst.y = t0;
+	}
+}

+

diff --git a/src/Shader/PixelPipeline.hpp b/src/Shader/PixelPipeline.hpp
new file mode 100644
index 0000000..2b49ad7
--- /dev/null
+++ b/src/Shader/PixelPipeline.hpp
@@ -0,0 +1,118 @@
+// SwiftShader Software Renderer

+//

+// Copyright(c) 2015 Google Inc.

+//

+// All rights reserved. No part of this software may be copied, distributed, transmitted,

+// transcribed, stored in a retrieval system, translated into any human or computer

+// language by any means, or disclosed to third parties without the explicit written

+// agreement of Google Inc. Without such an agreement, no rights or licenses, express

+// or implied, including but not limited to any patent rights, are granted to you.

+//

+

+#ifndef sw_PixelPipeline_hpp

+#define sw_PixelPipeline_hpp

+

+#include "PixelRoutine.hpp"

+

+namespace sw

+{

+	class PixelPipeline : public PixelRoutine

+	{

+	public:

+		PixelPipeline(const PixelProcessor::State &state, const PixelShader *shader) :

+			PixelRoutine(state, shader), perturbate(false), luminance(false), previousScaling(false) {}

+		virtual ~PixelPipeline() {}

+

+	protected:

+		virtual void setBuiltins(PixelRoutine::Registers &r, Int &x, Int &y, Float4(&z)[4], Float4 &w);

+		virtual void applyShader(PixelRoutine::Registers &r, Int cMask[4]);

+		virtual Bool alphaTest(PixelRoutine::Registers &r, Int cMask[4]);

+		virtual void rasterOperation(PixelRoutine::Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);

+		virtual QuadRasterizer::Registers* createRegisters(const PixelShader *shader) { return new PixelPipeline::Registers(shader); };

+

+	private:

+		struct Registers : public PixelRoutine::Registers
+		{
+			Registers(const PixelShader *shader) : PixelRoutine::Registers(shader), current(rs[0]), diffuse(vs[0]), specular(vs[1]) {}
+
+			Vector4s &current;
+			Vector4s &diffuse;
+			Vector4s &specular;
+
+			Vector4s rs[6];
+			Vector4s vs[2];
+			Vector4s ts[6];
+
+			// bem(l) offsets and luminance
+			Float4 du;
+			Float4 dv;
+			Short4 L;
+
+			// texm3x3 temporaries
+			Float4 u_; // FIXME
+			Float4 v_; // FIXME
+			Float4 w_; // FIXME
+			Float4 U;  // FIXME
+			Float4 V;  // FIXME
+			Float4 W;  // FIXME
+		};

+

+		void fixedFunction(Registers& r);

+		void blendTexture(Registers &r, Vector4s &temp, Vector4s &texture, int stage);

+		void fogBlend(Registers &r, Vector4s &current, Float4 &fog, Float4 &z, Float4 &rhw);

+		void specularPixel(Vector4s &current, Vector4s &specular);

+

+		void sampleTexture(Registers &r, Vector4s &c, int coordinates, int sampler, bool project = false);
+		void sampleTexture(Registers &r, Vector4s &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project = false, bool bias = false);
+		void sampleTexture(Registers &r, Vector4s &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false);

+

+		Short4 convertFixed12(RValue<Float4> cf);
+		void convertFixed12(Vector4s &cs, Vector4f &cf);
+		Float4 convertSigned12(Short4 &cs);
+		void convertSigned12(Vector4f &cf, Vector4s &cs);

+

+		void writeDestination(Registers &r, Vector4s &d, const Dst &dst);

+		Vector4s fetchRegisterS(Registers &r, const Src &src);
+

+		// Instructions
+		void MOV(Vector4s &dst, Vector4s &src0);
+		void ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1);
+		void SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1);
+		void MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2);
+		void MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1);
+		void DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1);
+		void DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1);
+		void LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2);
+		void TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate);
+		void TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project);
+		void TEXDP3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src);
+		void TEXDP3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0);
+		void TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s);
+		void TEXKILL(Int cMask[4], Vector4s &dst);
+		void TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, bool project);
+		void TEXLD(Registers &r, Vector4s &dst, Vector4s &src, int stage, bool project);
+		void TEXBEM(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage);
+		void TEXBEML(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage);
+		void TEXREG2AR(Registers &r, Vector4s &dst, Vector4s &src0, int stage);
+		void TEXREG2GB(Registers &r, Vector4s &dst, Vector4s &src0, int stage);
+		void TEXREG2RGB(Registers &r, Vector4s &dst, Vector4s &src0, int stage);
+		void TEXM3X2DEPTH(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling);
+		void TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling);
+		void TEXM3X2TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling);
+		void TEXM3X3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling);
+		void TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling);
+		void TEXM3X3SPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1);
+		void TEXM3X3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool singedScaling);
+		void TEXM3X3VSPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0);
+		void TEXDEPTH(Registers &r);
+		void CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2);
+		void CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2);
+		void BEM(Registers &r, Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage);

+

+		bool perturbate;
+		bool luminance;
+		bool previousScaling;
+	};

+}

+

+#endif

diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
new file mode 100644
index 0000000..1dbab7a
--- /dev/null
+++ b/src/Shader/PixelProgram.cpp
@@ -0,0 +1,1559 @@
+// SwiftShader Software Renderer

+//

+// Copyright(c) 2015 Google Inc.

+//

+// All rights reserved. No part of this software may be copied, distributed, transmitted,

+// transcribed, stored in a retrieval system, translated into any human or computer

+// language by any means, or disclosed to third parties without the explicit written

+// agreement of Google Inc. Without such an agreement, no rights or licenses, express

+// or implied, including but not limited to any patent rights, are granted to you.

+//

+

+#include "PixelProgram.hpp"

+#include "Primitive.hpp"

+#include "Renderer.hpp"

+#include "SamplerCore.hpp"

+

+namespace sw

+{

+	extern bool postBlendSRGB;

+	extern bool booleanFaceRegister;
+	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
+	extern bool fullPixelPositionRegister;

+

+	void PixelProgram::setBuiltins(PixelRoutine::Registers &rBase, Int &x, Int &y, Float4(&z)[4], Float4 &w)

+	{

+		Registers& r = *static_cast<Registers*>(&rBase);

+

+		if(shader->getVersion() >= 0x0300)
+		{
+			if(shader->vPosDeclared)
+			{
+				if(!halfIntegerCoordinates)
+				{
+					r.vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
+					r.vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
+				}
+				else
+				{
+					r.vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
+					r.vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
+				}
+
+				if(fullPixelPositionRegister)
+				{
+					r.vPos.z = z[0]; // FIXME: Centroid?
+					r.vPos.w = w;    // FIXME: Centroid?
+				}
+			}
+
+			if(shader->vFaceDeclared)
+			{
+				Float4 area = *Pointer<Float>(r.primitive + OFFSET(Primitive, area));
+				Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
+
+				r.vFace.x = face;
+				r.vFace.y = face;
+				r.vFace.z = face;
+				r.vFace.w = face;
+			}
+		}

+	}

+

+	void PixelProgram::applyShader(PixelRoutine::Registers &rBase, Int cMask[4])
+	{

+		Registers& r = *static_cast<Registers*>(&rBase);
+
+		r.enableIndex = 0;
+		r.stackIndex = 0;
+
+		if(shader->containsLeaveInstruction())
+		{
+			r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+		}
+
+		bool out[4][4] = { false };
+
+		// Create all call site return blocks up front
+		for(size_t i = 0; i < shader->getLength(); i++)
+		{
+			const Shader::Instruction *instruction = shader->getInstruction(i);
+			Shader::Opcode opcode = instruction->opcode;
+
+			if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
+			{
+				const Dst &dst = instruction->dst;
+
+				ASSERT(callRetBlock[dst.label].size() == dst.callSite);
+				callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
+			}
+		}
+
+		for(size_t i = 0; i < shader->getLength(); i++)
+		{
+			const Shader::Instruction *instruction = shader->getInstruction(i);
+			Shader::Opcode opcode = instruction->opcode;
+
+			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
+			{
+				continue;
+			}
+
+			const Dst &dst = instruction->dst;
+			const Src &src0 = instruction->src[0];
+			const Src &src1 = instruction->src[1];
+			const Src &src2 = instruction->src[2];
+			const Src &src3 = instruction->src[3];
+
+			bool predicate = instruction->predicate;
+			Control control = instruction->control;
+			bool pp = dst.partialPrecision;
+			bool project = instruction->project;
+			bool bias = instruction->bias;
+
+			Vector4f d;
+			Vector4f s0;
+			Vector4f s1;
+			Vector4f s2;
+			Vector4f s3;
+
+			if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
+			{
+				if(dst.type == Shader::PARAMETER_TEXTURE)
+				{
+					d.x = r.vf[2 + dst.index].x;
+					d.y = r.vf[2 + dst.index].y;
+					d.z = r.vf[2 + dst.index].z;
+					d.w = r.vf[2 + dst.index].w;
+				}
+				else
+				{
+					d = r.rf[dst.index];
+				}
+			}
+
+			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
+			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
+			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
+			if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(r, src3);
+
+			switch(opcode)
+			{
+			case Shader::OPCODE_PS_2_0:                                                    break;
+			case Shader::OPCODE_PS_2_x:                                                    break;
+			case Shader::OPCODE_PS_3_0:                                                    break;
+			case Shader::OPCODE_DEF:                                                       break;
+			case Shader::OPCODE_DCL:                                                       break;
+			case Shader::OPCODE_NOP:                                                       break;
+			case Shader::OPCODE_MOV:        mov(d, s0);                                    break;
+			case Shader::OPCODE_F2B:        f2b(d, s0);                                    break;
+			case Shader::OPCODE_B2F:        b2f(d, s0);                                    break;
+			case Shader::OPCODE_ADD:        add(d, s0, s1);                                break;
+			case Shader::OPCODE_SUB:        sub(d, s0, s1);                                break;
+			case Shader::OPCODE_MUL:        mul(d, s0, s1);                                break;
+			case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);                            break;
+			case Shader::OPCODE_DP1:        dp1(d, s0, s1);                                break;
+			case Shader::OPCODE_DP2:        dp2(d, s0, s1);                                break;
+			case Shader::OPCODE_DP2ADD:     dp2add(d, s0, s1, s2);                         break;
+			case Shader::OPCODE_DP3:        dp3(d, s0, s1);                                break;
+			case Shader::OPCODE_DP4:        dp4(d, s0, s1);                                break;
+			case Shader::OPCODE_CMP0:       cmp0(d, s0, s1, s2);                           break;
+			case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);                      break;
+			case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);                         break;
+			case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                        break;
+			case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);                     break;
+			case Shader::OPCODE_FRC:        frc(d, s0);                                    break;
+			case Shader::OPCODE_TRUNC:      trunc(d, s0);                                  break;
+			case Shader::OPCODE_FLOOR:      floor(d, s0);                                  break;
+			case Shader::OPCODE_ROUND:      round(d, s0);                                  break;
+			case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);                              break;
+			case Shader::OPCODE_CEIL:       ceil(d, s0);                                   break;
+			case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);                              break;
+			case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                               break;
+			case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);                              break;
+			case Shader::OPCODE_LOG2:       log2(d, s0, pp);                               break;
+			case Shader::OPCODE_EXP:        exp(d, s0, pp);                                break;
+			case Shader::OPCODE_LOG:        log(d, s0, pp);                                break;
+			case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                               break;
+			case Shader::OPCODE_DIV:        div(d, s0, s1);                                break;
+			case Shader::OPCODE_MOD:        mod(d, s0, s1);                                break;
+			case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                               break;
+			case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                               break;
+			case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                                break;
+			case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);                             break;
+			case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);                             break;
+			case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);                             break;
+			case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);                        break;
+			case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);                        break;
+			case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);                        break;
+			case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);                        break;
+			case Shader::OPCODE_MIN:        min(d, s0, s1);                                break;
+			case Shader::OPCODE_MAX:        max(d, s0, s1);                                break;
+			case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);                            break;
+			case Shader::OPCODE_STEP:       step(d, s0, s1);                               break;
+			case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);                         break;
+			case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);                           break;
+			case Shader::OPCODE_POW:        pow(d, s0, s1, pp);                            break;
+			case Shader::OPCODE_SGN:        sgn(d, s0);                                    break;
+			case Shader::OPCODE_CRS:        crs(d, s0, s1);                                break;
+			case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                       break;
+			case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                       break;
+			case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                       break;
+			case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                       break;
+			case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                           break;
+			case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                           break;
+			case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                           break;
+			case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                           break;
+			case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);                     break;
+			case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);                     break;
+			case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);                     break;
+			case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);                     break;
+			case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                               break;
+			case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                               break;
+			case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                               break;
+			case Shader::OPCODE_ABS:        abs(d, s0);                                    break;
+			case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);                             break;
+			case Shader::OPCODE_COS:        cos(d, s0, pp);                                break;
+			case Shader::OPCODE_SIN:        sin(d, s0, pp);                                break;
+			case Shader::OPCODE_TAN:        tan(d, s0, pp);                                break;
+			case Shader::OPCODE_ACOS:       acos(d, s0, pp);                               break;
+			case Shader::OPCODE_ASIN:       asin(d, s0, pp);                               break;
+			case Shader::OPCODE_ATAN:       atan(d, s0, pp);                               break;
+			case Shader::OPCODE_ATAN2:      atan2(d, s0, s1, pp);                          break;
+			case Shader::OPCODE_COSH:       cosh(d, s0, pp);                               break;
+			case Shader::OPCODE_SINH:       sinh(d, s0, pp);                               break;
+			case Shader::OPCODE_TANH:       tanh(d, s0, pp);                               break;
+			case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);                              break;
+			case Shader::OPCODE_ASINH:      asinh(d, s0, pp);                              break;
+			case Shader::OPCODE_ATANH:      atanh(d, s0, pp);                              break;
+			case Shader::OPCODE_M4X4:       M4X4(r, d, s0, src1);                          break;
+			case Shader::OPCODE_M4X3:       M4X3(r, d, s0, src1);                          break;
+			case Shader::OPCODE_M3X4:       M3X4(r, d, s0, src1);                          break;
+			case Shader::OPCODE_M3X3:       M3X3(r, d, s0, src1);                          break;
+			case Shader::OPCODE_M3X2:       M3X2(r, d, s0, src1);                          break;
+			case Shader::OPCODE_TEX:        TEXLD(r, d, s0, src1, project, bias);          break;
+			case Shader::OPCODE_TEXLDD:     TEXLDD(r, d, s0, src1, s2, s3, project, bias); break;
+			case Shader::OPCODE_TEXLDL:     TEXLDL(r, d, s0, src1, project, bias);         break;
+			case Shader::OPCODE_TEXKILL:    TEXKILL(cMask, d, dst.mask);                   break;
+			case Shader::OPCODE_DISCARD:    DISCARD(r, cMask, instruction);                break;
+			case Shader::OPCODE_DFDX:       DFDX(d, s0);                                   break;
+			case Shader::OPCODE_DFDY:       DFDY(d, s0);                                   break;
+			case Shader::OPCODE_FWIDTH:     FWIDTH(d, s0);                                 break;
+			case Shader::OPCODE_BREAK:      BREAK(r);                                      break;
+			case Shader::OPCODE_BREAKC:     BREAKC(r, s0, s1, control);                    break;
+			case Shader::OPCODE_BREAKP:     BREAKP(r, src0);                               break;
+			case Shader::OPCODE_CONTINUE:   CONTINUE(r);                                   break;
+			case Shader::OPCODE_TEST:       TEST();                                        break;
+			case Shader::OPCODE_CALL:       CALL(r, dst.label, dst.callSite);              break;
+			case Shader::OPCODE_CALLNZ:     CALLNZ(r, dst.label, dst.callSite, src0);      break;
+			case Shader::OPCODE_ELSE:       ELSE(r);                                       break;
+			case Shader::OPCODE_ENDIF:      ENDIF(r);                                      break;
+			case Shader::OPCODE_ENDLOOP:    ENDLOOP(r);                                    break;
+			case Shader::OPCODE_ENDREP:     ENDREP(r);                                     break;
+			case Shader::OPCODE_ENDWHILE:   ENDWHILE(r);                                   break;
+			case Shader::OPCODE_IF:         IF(r, src0);                                   break;
+			case Shader::OPCODE_IFC:        IFC(r, s0, s1, control);                       break;
+			case Shader::OPCODE_LABEL:      LABEL(dst.index);                              break;
+			case Shader::OPCODE_LOOP:       LOOP(r, src1);                                 break;
+			case Shader::OPCODE_REP:        REP(r, src0);                                  break;
+			case Shader::OPCODE_WHILE:      WHILE(r, src0);                                break;
+			case Shader::OPCODE_RET:        RET(r);                                        break;
+			case Shader::OPCODE_LEAVE:      LEAVE(r);                                      break;
+			case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);                       break;
+			case Shader::OPCODE_ALL:        all(d.x, s0);                                  break;
+			case Shader::OPCODE_ANY:        any(d.x, s0);                                  break;
+			case Shader::OPCODE_NOT:        not(d, s0);                                    break;
+			case Shader::OPCODE_OR:         or(d.x, s0.x, s1.x);                           break;
+			case Shader::OPCODE_XOR:        xor(d.x, s0.x, s1.x);                          break;
+			case Shader::OPCODE_AND:        and(d.x, s0.x, s1.x);                          break;
+			case Shader::OPCODE_END:                                                       break;
+			default:
+				ASSERT(false);
+			}
+
+			if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
+			{
+				if(dst.integer)
+				{
+					switch(opcode)
+					{
+					case Shader::OPCODE_DIV:
+						if(dst.x) d.x = Trunc(d.x);
+						if(dst.y) d.y = Trunc(d.y);
+						if(dst.z) d.z = Trunc(d.z);
+						if(dst.w) d.w = Trunc(d.w);
+						break;
+					default:
+						break;   // No truncation to integer required when arguments are integer
+					}
+				}
+
+				if(dst.saturate)
+				{
+					if(dst.x) d.x = Max(d.x, Float4(0.0f));
+					if(dst.y) d.y = Max(d.y, Float4(0.0f));
+					if(dst.z) d.z = Max(d.z, Float4(0.0f));
+					if(dst.w) d.w = Max(d.w, Float4(0.0f));
+
+					if(dst.x) d.x = Min(d.x, Float4(1.0f));
+					if(dst.y) d.y = Min(d.y, Float4(1.0f));
+					if(dst.z) d.z = Min(d.z, Float4(1.0f));
+					if(dst.w) d.w = Min(d.w, Float4(1.0f));
+				}
+
+				if(instruction->isPredicated())
+				{
+					Vector4f pDst;   // FIXME: Rename
+
+					switch(dst.type)
+					{
+					case Shader::PARAMETER_TEMP:
+						if(dst.rel.type == Shader::PARAMETER_VOID)
+						{
+							if(dst.x) pDst.x = r.rf[dst.index].x;
+							if(dst.y) pDst.y = r.rf[dst.index].y;
+							if(dst.z) pDst.z = r.rf[dst.index].z;
+							if(dst.w) pDst.w = r.rf[dst.index].w;
+						}
+						else
+						{
+							Int a = relativeAddress(r, dst);
+
+							if(dst.x) pDst.x = r.rf[dst.index + a].x;
+							if(dst.y) pDst.y = r.rf[dst.index + a].y;
+							if(dst.z) pDst.z = r.rf[dst.index + a].z;
+							if(dst.w) pDst.w = r.rf[dst.index + a].w;
+						}
+						break;
+					case Shader::PARAMETER_COLOROUT:
+						ASSERT(dst.rel.type == Shader::PARAMETER_VOID);
+						if(dst.x) pDst.x = r.oC[dst.index].x;
+						if(dst.y) pDst.y = r.oC[dst.index].y;
+						if(dst.z) pDst.z = r.oC[dst.index].z;
+						if(dst.w) pDst.w = r.oC[dst.index].w;
+						break;
+					case Shader::PARAMETER_PREDICATE:
+						if(dst.x) pDst.x = r.p0.x;
+						if(dst.y) pDst.y = r.p0.y;
+						if(dst.z) pDst.z = r.p0.z;
+						if(dst.w) pDst.w = r.p0.w;
+						break;
+					case Shader::PARAMETER_DEPTHOUT:
+						pDst.x = r.oDepth;
+						break;
+					default:
+						ASSERT(false);
+					}
+
+					Int4 enable = enableMask(r, instruction);
+
+					Int4 xEnable = enable;
+					Int4 yEnable = enable;
+					Int4 zEnable = enable;
+					Int4 wEnable = enable;
+
+					if(predicate)
+					{
+						unsigned char pSwizzle = instruction->predicateSwizzle;
+
+						Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
+						Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
+						Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
+						Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
+
+						if(!instruction->predicateNot)
+						{
+							if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
+							if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
+							if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
+							if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
+						}
+						else
+						{
+							if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
+							if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
+							if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
+							if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
+						}
+					}
+
+					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
+					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
+					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
+					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
+
+					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
+					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
+					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
+					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
+				}
+
+				switch(dst.type)
+				{
+				case Shader::PARAMETER_TEMP:
+					if(dst.rel.type == Shader::PARAMETER_VOID)
+					{
+						if(dst.x) r.rf[dst.index].x = d.x;
+						if(dst.y) r.rf[dst.index].y = d.y;
+						if(dst.z) r.rf[dst.index].z = d.z;
+						if(dst.w) r.rf[dst.index].w = d.w;
+					}
+					else
+					{
+						Int a = relativeAddress(r, dst);
+
+						if(dst.x) r.rf[dst.index + a].x = d.x;
+						if(dst.y) r.rf[dst.index + a].y = d.y;
+						if(dst.z) r.rf[dst.index + a].z = d.z;
+						if(dst.w) r.rf[dst.index + a].w = d.w;
+					}
+					break;
+				case Shader::PARAMETER_COLOROUT:
+					ASSERT(dst.rel.type == Shader::PARAMETER_VOID);
+					if(dst.x) { r.oC[dst.index].x = d.x; out[dst.index][0] = true; }
+					if(dst.y) { r.oC[dst.index].y = d.y; out[dst.index][1] = true; }
+					if(dst.z) { r.oC[dst.index].z = d.z; out[dst.index][2] = true; }
+					if(dst.w) { r.oC[dst.index].w = d.w; out[dst.index][3] = true; }
+					break;
+				case Shader::PARAMETER_PREDICATE:
+					if(dst.x) r.p0.x = d.x;
+					if(dst.y) r.p0.y = d.y;
+					if(dst.z) r.p0.z = d.z;
+					if(dst.w) r.p0.w = d.w;
+					break;
+				case Shader::PARAMETER_DEPTHOUT:
+					r.oDepth = d.x;
+					break;
+				default:
+					ASSERT(false);
+				}
+			}
+		}
+
+		if(currentLabel != -1)
+		{
+			Nucleus::setInsertBlock(returnBlock);
+		}
+
+		for(int i = 0; i < 4; i++)
+		{
+			if(state.targetFormat[i] != FORMAT_NULL)
+			{
+				if(!out[i][0]) r.oC[i].x = Float4(0.0f);
+				if(!out[i][1]) r.oC[i].y = Float4(0.0f);
+				if(!out[i][2]) r.oC[i].z = Float4(0.0f);
+				if(!out[i][3]) r.oC[i].w = Float4(0.0f);
+			}
+		}
+	}
+

+	Bool PixelProgram::alphaTest(PixelRoutine::Registers &rBase, Int cMask[4])

+	{

+		Registers& r = *static_cast<Registers*>(&rBase);

+

+		clampColor(r.oC);

+

+		if(!state.alphaTestActive())
+		{
+			return true;
+		}
+
+		Int aMask;
+
+		if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
+		{
+			Short4 alpha = RoundShort4(r.oC[0].w * Float4(0x1000));
+
+			PixelRoutine::alphaTest(r, aMask, alpha);
+
+			for(unsigned int q = 0; q < state.multiSample; q++)
+			{
+				cMask[q] &= aMask;
+			}
+		}
+		else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
+		{
+			alphaToCoverage(r, cMask, r.oC[0].w);
+		}
+		else ASSERT(false);
+
+		Int pass = cMask[0];
+
+		for(unsigned int q = 1; q < state.multiSample; q++)
+		{
+			pass = pass | cMask[q];
+		}
+
+		return pass != 0x0;

+	}

+

+	void PixelProgram::rasterOperation(PixelRoutine::Registers &rBase, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])

+	{

+		Registers& r = *static_cast<Registers*>(&rBase);

+

+		for(int index = 0; index < 4; index++)
+		{
+			if(!state.colorWriteActive(index))
+			{
+				continue;
+			}
+
+			if(!postBlendSRGB && state.writeSRGB)
+			{
+				r.oC[index].x = linearToSRGB(r.oC[index].x);
+				r.oC[index].y = linearToSRGB(r.oC[index].y);
+				r.oC[index].z = linearToSRGB(r.oC[index].z);
+			}
+
+			if(index == 0)
+			{
+				fogBlend(r, r.oC[index], fog, r.z[0], r.rhw);
+			}
+
+			switch(state.targetFormat[index])
+			{
+			case FORMAT_R5G6B5:
+			case FORMAT_X8R8G8B8:
+			case FORMAT_X8B8G8R8:
+			case FORMAT_A8R8G8B8:
+			case FORMAT_A8B8G8R8:
+			case FORMAT_A8:
+			case FORMAT_G16R16:
+			case FORMAT_A16B16G16R16:
+				for(unsigned int q = 0; q < state.multiSample; q++)
+				{
+					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData, colorSliceB[index]));
+					Vector4s color;
+
+					color.x = convertFixed16(r.oC[index].x, false);
+					color.y = convertFixed16(r.oC[index].y, false);
+					color.z = convertFixed16(r.oC[index].z, false);
+					color.w = convertFixed16(r.oC[index].w, false);
+
+					if(state.multiSampleMask & (1 << q))
+					{
+						alphaBlend(r, index, buffer, color, x);
+						logicOperation(r, index, buffer, color, x);
+						writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+					}
+				}
+				break;
+			case FORMAT_R32F:
+			case FORMAT_G32R32F:
+			case FORMAT_A32B32G32R32F:
+				for(unsigned int q = 0; q < state.multiSample; q++)
+				{
+					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData, colorSliceB[index]));
+					Vector4f color = r.oC[index];
+
+					if(state.multiSampleMask & (1 << q))
+					{
+						alphaBlend(r, index, buffer, color, x);
+						writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+					}
+				}
+				break;
+			default:
+				ASSERT(false);
+			}
+		}
+	}

+
+	void PixelProgram::sampleTexture(Registers &r, Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
+	{
+		if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
+		{
+			sampleTexture(r, c, sampler.index, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided);
+		}
+		else
+		{
+			Int index = As<Int>(Float(fetchRegisterF(r, sampler).x.x));
+
+			for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
+			{
+				if(shader->usesSampler(i))
+				{
+					If(index == i)
+					{
+						sampleTexture(r, c, i, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided);
+						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
+					}
+				}
+			}
+		}
+	}
+
+	void PixelProgram::sampleTexture(Registers &r, Vector4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
+	{
+#if PERF_PROFILE
+		Long texTime = Ticks();
+#endif
+
+		Pointer<Byte> texture = r.data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
+
+		if(!project)
+		{
+			sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, gradients, lodProvided);
+		}
+		else
+		{
+			Float4 rq = reciprocal(q);
+
+			Float4 u_q = u * rq;
+			Float4 v_q = v * rq;
+			Float4 w_q = w * rq;
+
+			sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, gradients, lodProvided);
+		}
+
+#if PERF_PROFILE
+		r.cycles[PERF_TEX] += Ticks() - texTime;
+#endif
+	}
+
+	void PixelProgram::clampColor(Vector4f oC[4])
+	{
+		for(int index = 0; index < 4; index++)
+		{
+			if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
+			{
+				continue;
+			}
+
+			switch(state.targetFormat[index])
+			{
+			case FORMAT_NULL:
+				break;
+			case FORMAT_R5G6B5:
+			case FORMAT_A8R8G8B8:
+			case FORMAT_A8B8G8R8:
+			case FORMAT_X8R8G8B8:
+			case FORMAT_X8B8G8R8:
+			case FORMAT_A8:
+			case FORMAT_G16R16:
+			case FORMAT_A16B16G16R16:
+				oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
+				oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
+				oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
+				oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
+				break;
+			case FORMAT_R32F:
+			case FORMAT_G32R32F:
+			case FORMAT_A32B32G32R32F:
+				break;
+			default:
+				ASSERT(false);
+			}
+		}
+	}
+
+	Int4 PixelProgram::enableMask(Registers &r, const Shader::Instruction *instruction)
+	{
+		Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
+
+		if(!whileTest)
+		{
+			if(shader->containsBreakInstruction() && instruction->analysisBreak)
+			{
+				enable &= r.enableBreak;
+			}
+
+			if(shader->containsContinueInstruction() && instruction->analysisContinue)
+			{
+				enable &= r.enableContinue;
+			}
+
+			if(shader->containsLeaveInstruction() && instruction->analysisLeave)
+			{
+				enable &= r.enableLeave;
+			}
+		}
+
+		return enable;
+	}
+
+	Vector4f PixelProgram::fetchRegisterF(Registers &r, const Src &src, int offset)
+	{
+		Vector4f reg;
+		int i = src.index + offset;
+
+		switch(src.type)
+		{
+		case Shader::PARAMETER_TEMP:
+			if(src.rel.type == Shader::PARAMETER_VOID)
+			{
+				reg = r.rf[i];
+			}
+			else
+			{
+				Int a = relativeAddress(r, src);
+
+				reg = r.rf[i + a];
+			}
+			break;
+		case Shader::PARAMETER_INPUT:
+			{
+				if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
+				{
+					reg = r.vf[i];
+				}
+				else if(src.rel.type == Shader::PARAMETER_LOOP)
+				{
+					Int aL = r.aL[r.loopDepth];
+
+					reg = r.vf[i + aL];
+				}
+				else
+				{
+					Int a = relativeAddress(r, src);
+
+					reg = r.vf[i + a];
+				}
+			}
+			break;
+		case Shader::PARAMETER_CONST:
+			reg = readConstant(r, src, offset);
+			break;
+		case Shader::PARAMETER_TEXTURE:
+			reg = r.vf[2 + i];
+			break;
+		case Shader::PARAMETER_MISCTYPE:
+			if(src.index == 0) reg = r.vPos;
+			if(src.index == 1) reg = r.vFace;
+			break;
+		case Shader::PARAMETER_SAMPLER:
+			if(src.rel.type == Shader::PARAMETER_VOID)
+			{
+				reg.x = As<Float4>(Int4(i));
+			}
+			else if(src.rel.type == Shader::PARAMETER_TEMP)
+			{
+				reg.x = As<Float4>(Int4(i) + RoundInt(r.rf[src.rel.index].x));
+			}
+			return reg;
+		case Shader::PARAMETER_PREDICATE:   return reg; // Dummy
+		case Shader::PARAMETER_VOID:        return reg; // Dummy
+		case Shader::PARAMETER_FLOAT4LITERAL:
+			reg.x = Float4(src.value[0]);
+			reg.y = Float4(src.value[1]);
+			reg.z = Float4(src.value[2]);
+			reg.w = Float4(src.value[3]);
+			break;
+		case Shader::PARAMETER_CONSTINT:    return reg; // Dummy
+		case Shader::PARAMETER_CONSTBOOL:   return reg; // Dummy
+		case Shader::PARAMETER_LOOP:        return reg; // Dummy
+		case Shader::PARAMETER_COLOROUT:
+			reg = r.oC[i];
+			break;
+		case Shader::PARAMETER_DEPTHOUT:
+			reg.x = r.oDepth;
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
+		const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
+		const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
+		const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
+
+		Vector4f mod;
+
+		switch(src.modifier)
+		{
+		case Shader::MODIFIER_NONE:
+			mod.x = x;
+			mod.y = y;
+			mod.z = z;
+			mod.w = w;
+			break;
+		case Shader::MODIFIER_NEGATE:
+			mod.x = -x;
+			mod.y = -y;
+			mod.z = -z;
+			mod.w = -w;
+			break;
+		case Shader::MODIFIER_ABS:
+			mod.x = Abs(x);
+			mod.y = Abs(y);
+			mod.z = Abs(z);
+			mod.w = Abs(w);
+			break;
+		case Shader::MODIFIER_ABS_NEGATE:
+			mod.x = -Abs(x);
+			mod.y = -Abs(y);
+			mod.z = -Abs(z);
+			mod.w = -Abs(w);
+			break;
+		case Shader::MODIFIER_NOT:
+			mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
+			mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
+			mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
+			mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
+			break;
+		default:
+			ASSERT(false);
+		}
+
+		return mod;
+	}
+
+	Vector4f PixelProgram::readConstant(Registers &r, const Src &src, int offset)
+	{
+		Vector4f c;
+
+		int i = src.index + offset;
+
+		if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
+		{
+			c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData, ps.c[i]));
+
+			c.x = c.x.xxxx;
+			c.y = c.y.yyyy;
+			c.z = c.z.zzzz;
+			c.w = c.w.wwww;
+
+			if(shader->containsDefineInstruction())   // Constant may be known at compile time
+			{
+				for(size_t j = 0; j < shader->getLength(); j++)
+				{
+					const Shader::Instruction &instruction = *shader->getInstruction(j);
+
+					if(instruction.opcode == Shader::OPCODE_DEF)
+					{
+						if(instruction.dst.index == i)
+						{
+							c.x = Float4(instruction.src[0].value[0]);
+							c.y = Float4(instruction.src[0].value[1]);
+							c.z = Float4(instruction.src[0].value[2]);
+							c.w = Float4(instruction.src[0].value[3]);
+
+							break;
+						}
+					}
+				}
+			}
+		}
+		else if(src.rel.type == Shader::PARAMETER_LOOP)
+		{
+			Int loopCounter = r.aL[r.loopDepth];
+
+			c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData, ps.c[i]) + loopCounter * 16);
+
+			c.x = c.x.xxxx;
+			c.y = c.y.yyyy;
+			c.z = c.z.zzzz;
+			c.w = c.w.wwww;
+		}
+		else
+		{
+			Int a = relativeAddress(r, src);
+
+			c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData, ps.c[i]) + a * 16);
+
+			c.x = c.x.xxxx;
+			c.y = c.y.yyyy;
+			c.z = c.z.zzzz;
+			c.w = c.w.wwww;
+		}
+
+		return c;
+	}
+
+	Int PixelProgram::relativeAddress(Registers &r, const Shader::Parameter &var)
+	{
+		ASSERT(var.rel.deterministic);
+
+		if(var.rel.type == Shader::PARAMETER_TEMP)
+		{
+			return RoundInt(Extract(r.rf[var.rel.index].x, 0)) * var.rel.scale;
+		}
+		else if(var.rel.type == Shader::PARAMETER_INPUT)
+		{
+			return RoundInt(Extract(r.vf[var.rel.index].x, 0)) * var.rel.scale;
+		}
+		else if(var.rel.type == Shader::PARAMETER_OUTPUT)
+		{
+			return RoundInt(Extract(r.oC[var.rel.index].x, 0)) * var.rel.scale;
+		}
+		else if(var.rel.type == Shader::PARAMETER_CONST)
+		{
+			RValue<Float4> c = *Pointer<Float4>(r.data + OFFSET(DrawData, ps.c[var.rel.index]));
+
+			return RoundInt(Extract(c, 0)) * var.rel.scale;
+		}
+		else ASSERT(false);
+
+		return 0;
+	}
+
+	Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
+	{
+		Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
+		Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
+
+		return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
+	}
+
+	void PixelProgram::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
+	{
+		Vector4f row0 = fetchRegisterF(r, src1, 0);
+		Vector4f row1 = fetchRegisterF(r, src1, 1);
+
+		dst.x = dot3(src0, row0);
+		dst.y = dot3(src0, row1);
+	}
+
+	void PixelProgram::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
+	{
+		Vector4f row0 = fetchRegisterF(r, src1, 0);
+		Vector4f row1 = fetchRegisterF(r, src1, 1);
+		Vector4f row2 = fetchRegisterF(r, src1, 2);
+
+		dst.x = dot3(src0, row0);
+		dst.y = dot3(src0, row1);
+		dst.z = dot3(src0, row2);
+	}
+
+	void PixelProgram::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
+	{
+		Vector4f row0 = fetchRegisterF(r, src1, 0);
+		Vector4f row1 = fetchRegisterF(r, src1, 1);
+		Vector4f row2 = fetchRegisterF(r, src1, 2);
+		Vector4f row3 = fetchRegisterF(r, src1, 3);
+
+		dst.x = dot3(src0, row0);
+		dst.y = dot3(src0, row1);
+		dst.z = dot3(src0, row2);
+		dst.w = dot3(src0, row3);
+	}
+
+	void PixelProgram::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
+	{
+		Vector4f row0 = fetchRegisterF(r, src1, 0);
+		Vector4f row1 = fetchRegisterF(r, src1, 1);
+		Vector4f row2 = fetchRegisterF(r, src1, 2);
+
+		dst.x = dot4(src0, row0);
+		dst.y = dot4(src0, row1);
+		dst.z = dot4(src0, row2);
+	}
+
+	void PixelProgram::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
+	{
+		Vector4f row0 = fetchRegisterF(r, src1, 0);
+		Vector4f row1 = fetchRegisterF(r, src1, 1);
+		Vector4f row2 = fetchRegisterF(r, src1, 2);
+		Vector4f row3 = fetchRegisterF(r, src1, 3);
+
+		dst.x = dot4(src0, row0);
+		dst.y = dot4(src0, row1);
+		dst.z = dot4(src0, row2);
+		dst.w = dot4(src0, row3);
+	}
+
+	void PixelProgram::TEXLD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
+	{
+		Vector4f tmp;
+		sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias);
+
+		dst.x = tmp[(src1.swizzle >> 0) & 0x3];
+		dst.y = tmp[(src1.swizzle >> 2) & 0x3];
+		dst.z = tmp[(src1.swizzle >> 4) & 0x3];
+		dst.w = tmp[(src1.swizzle >> 6) & 0x3];
+	}
+
+	void PixelProgram::TEXLDD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project, bool bias)
+	{
+		Vector4f tmp;
+		sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, project, bias, true);
+
+		dst.x = tmp[(src1.swizzle >> 0) & 0x3];
+		dst.y = tmp[(src1.swizzle >> 2) & 0x3];
+		dst.z = tmp[(src1.swizzle >> 4) & 0x3];
+		dst.w = tmp[(src1.swizzle >> 6) & 0x3];
+	}
+
+	void PixelProgram::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
+	{
+		Vector4f tmp;
+		sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias, false, true);
+
+		dst.x = tmp[(src1.swizzle >> 0) & 0x3];
+		dst.y = tmp[(src1.swizzle >> 2) & 0x3];
+		dst.z = tmp[(src1.swizzle >> 4) & 0x3];
+		dst.w = tmp[(src1.swizzle >> 6) & 0x3];
+	}
+
+	void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
+	{
+		Int kill = -1;
+
+		if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
+		if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
+		if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
+		if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
+
+		// FIXME: Dynamic branching affects TEXKILL?
+		//	if(shader->containsDynamicBranching())
+		//	{
+		//		kill = ~SignMask(enableMask(r));
+		//	}
+
+		for(unsigned int q = 0; q < state.multiSample; q++)
+		{
+			cMask[q] &= kill;
+		}
+
+		// FIXME: Branch to end of shader if all killed?
+	}
+
+	void PixelProgram::DISCARD(Registers &r, Int cMask[4], const Shader::Instruction *instruction)
+	{
+		Int kill = 0;
+
+		if(shader->containsDynamicBranching())
+		{
+			kill = ~SignMask(enableMask(r, instruction));
+		}
+
+		for(unsigned int q = 0; q < state.multiSample; q++)
+		{
+			cMask[q] &= kill;
+		}
+
+		// FIXME: Branch to end of shader if all killed?
+	}
+
+	void PixelProgram::DFDX(Vector4f &dst, Vector4f &src)
+	{
+		dst.x = src.x.yyww - src.x.xxzz;
+		dst.y = src.y.yyww - src.y.xxzz;
+		dst.z = src.z.yyww - src.z.xxzz;
+		dst.w = src.w.yyww - src.w.xxzz;
+	}
+
+	void PixelProgram::DFDY(Vector4f &dst, Vector4f &src)
+	{
+		dst.x = src.x.zwzw - src.x.xyxy;
+		dst.y = src.y.zwzw - src.y.xyxy;
+		dst.z = src.z.zwzw - src.z.xyxy;
+		dst.w = src.w.zwzw - src.w.xyxy;
+	}
+
+	void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src)
+	{
+		// abs(dFdx(src)) + abs(dFdy(src));
+		dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
+		dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
+		dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
+		dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
+	}
+
+	void PixelProgram::BREAK(Registers &r)
+	{
+		llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
+
+		if(breakDepth == 0)
+		{
+			r.enableIndex = r.enableIndex - breakDepth;
+			Nucleus::createBr(endBlock);
+		}
+		else
+		{
+			r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
+			Bool allBreak = SignMask(r.enableBreak) == 0x0;
+
+			r.enableIndex = r.enableIndex - breakDepth;
+			branch(allBreak, endBlock, deadBlock);
+		}
+
+		Nucleus::setInsertBlock(deadBlock);
+		r.enableIndex = r.enableIndex + breakDepth;
+	}
+
+	void PixelProgram::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
+	{
+		Int4 condition;
+
+		switch(control)
+		{
+		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
+		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
+		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
+		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
+		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
+		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
+		default:
+			ASSERT(false);
+		}
+
+		BREAK(r, condition);
+	}
+
+	void PixelProgram::BREAKP(Registers &r, const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
+	{
+		Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
+
+		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
+		{
+			condition = ~condition;
+		}
+
+		BREAK(r, condition);
+	}
+
+	void PixelProgram::BREAK(Registers &r, Int4 &condition)
+	{
+		condition &= r.enableStack[r.enableIndex];
+
+		llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
+
+		r.enableBreak = r.enableBreak & ~condition;
+		Bool allBreak = SignMask(r.enableBreak) == 0x0;
+
+		r.enableIndex = r.enableIndex - breakDepth;
+		branch(allBreak, endBlock, continueBlock);
+
+		Nucleus::setInsertBlock(continueBlock);
+		r.enableIndex = r.enableIndex + breakDepth;
+	}
+
+	void PixelProgram::CONTINUE(Registers &r)
+	{
+		r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
+	}
+
+	void PixelProgram::TEST()
+	{
+		whileTest = true;
+	}
+
+	void PixelProgram::CALL(Registers &r, int labelIndex, int callSiteIndex)
+	{
+		if(!labelBlock[labelIndex])
+		{
+			labelBlock[labelIndex] = Nucleus::createBasicBlock();
+		}
+
+		if(callRetBlock[labelIndex].size() > 1)
+		{
+			r.callStack[r.stackIndex++] = UInt(callSiteIndex);
+		}
+
+		Int4 restoreLeave = r.enableLeave;
+
+		Nucleus::createBr(labelBlock[labelIndex]);
+		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
+
+		r.enableLeave = restoreLeave;
+	}
+
+	void PixelProgram::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src)
+	{
+		if(src.type == Shader::PARAMETER_CONSTBOOL)
+		{
+			CALLNZb(r, labelIndex, callSiteIndex, src);
+		}
+		else if(src.type == Shader::PARAMETER_PREDICATE)
+		{
+			CALLNZp(r, labelIndex, callSiteIndex, src);
+		}
+		else ASSERT(false);
+	}
+
+	void PixelProgram::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister)
+	{
+		Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
+
+		if(boolRegister.modifier == Shader::MODIFIER_NOT)
+		{
+			condition = !condition;
+		}
+
+		if(!labelBlock[labelIndex])
+		{
+			labelBlock[labelIndex] = Nucleus::createBasicBlock();
+		}
+
+		if(callRetBlock[labelIndex].size() > 1)
+		{
+			r.callStack[r.stackIndex++] = UInt(callSiteIndex);
+		}
+
+		Int4 restoreLeave = r.enableLeave;
+
+		branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
+		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
+
+		r.enableLeave = restoreLeave;
+	}
+
+	void PixelProgram::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister)
+	{
+		Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
+
+		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
+		{
+			condition = ~condition;
+		}
+
+		condition &= r.enableStack[r.enableIndex];
+
+		if(!labelBlock[labelIndex])
+		{
+			labelBlock[labelIndex] = Nucleus::createBasicBlock();
+		}
+
+		if(callRetBlock[labelIndex].size() > 1)
+		{
+			r.callStack[r.stackIndex++] = UInt(callSiteIndex);
+		}
+
+		r.enableIndex++;
+		r.enableStack[r.enableIndex] = condition;
+		Int4 restoreLeave = r.enableLeave;
+
+		Bool notAllFalse = SignMask(condition) != 0;
+		branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
+		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
+
+		r.enableIndex--;
+		r.enableLeave = restoreLeave;
+	}
+
+	void PixelProgram::ELSE(Registers &r)
+	{
+		ifDepth--;
+
+		llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
+		llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
+
+		if(isConditionalIf[ifDepth])
+		{
+			Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
+			Bool notAllFalse = SignMask(condition) != 0;
+
+			branch(notAllFalse, falseBlock, endBlock);
+
+			r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
+		}
+		else
+		{
+			Nucleus::createBr(endBlock);
+			Nucleus::setInsertBlock(falseBlock);
+		}
+
+		ifFalseBlock[ifDepth] = endBlock;
+
+		ifDepth++;
+	}
+
+	void PixelProgram::ENDIF(Registers &r)
+	{
+		ifDepth--;
+
+		llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
+
+		Nucleus::createBr(endBlock);
+		Nucleus::setInsertBlock(endBlock);
+
+		if(isConditionalIf[ifDepth])
+		{
+			breakDepth--;
+			r.enableIndex--;
+		}
+	}
+
+	void PixelProgram::ENDLOOP(Registers &r)
+	{
+		loopRepDepth--;
+
+		r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth];   // FIXME: +=
+
+		llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
+		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
+
+		Nucleus::createBr(testBlock);
+		Nucleus::setInsertBlock(endBlock);
+
+		r.loopDepth--;
+		r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+	}
+
+	void PixelProgram::ENDREP(Registers &r)
+	{
+		loopRepDepth--;
+
+		llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
+		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
+
+		Nucleus::createBr(testBlock);
+		Nucleus::setInsertBlock(endBlock);
+
+		r.loopDepth--;
+		r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+	}
+
+	void PixelProgram::ENDWHILE(Registers &r)
+	{
+		loopRepDepth--;
+
+		llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
+		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
+
+		Nucleus::createBr(testBlock);
+		Nucleus::setInsertBlock(endBlock);
+
+		r.enableIndex--;
+		r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+		whileTest = false;
+	}
+
+	void PixelProgram::IF(Registers &r, const Src &src)
+	{
+		if(src.type == Shader::PARAMETER_CONSTBOOL)
+		{
+			IFb(r, src);
+		}
+		else if(src.type == Shader::PARAMETER_PREDICATE)
+		{
+			IFp(r, src);
+		}
+		else
+		{
+			Int4 condition = As<Int4>(fetchRegisterF(r, src).x);
+			IF(r, condition);
+		}
+	}
+
+	void PixelProgram::IFb(Registers &r, const Src &boolRegister)
+	{
+		ASSERT(ifDepth < 24 + 4);
+
+		Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
+
+		if(boolRegister.modifier == Shader::MODIFIER_NOT)
+		{
+			condition = !condition;
+		}
+
+		llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
+
+		branch(condition, trueBlock, falseBlock);
+
+		isConditionalIf[ifDepth] = false;
+		ifFalseBlock[ifDepth] = falseBlock;
+
+		ifDepth++;
+	}
+
+	void PixelProgram::IFp(Registers &r, const Src &predicateRegister)
+	{
+		Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
+
+		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
+		{
+			condition = ~condition;
+		}
+
+		IF(r, condition);
+	}
+
+	void PixelProgram::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
+	{
+		Int4 condition;
+
+		switch(control)
+		{
+		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
+		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
+		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
+		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
+		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
+		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
+		default:
+			ASSERT(false);
+		}
+
+		IF(r, condition);
+	}
+
+	void PixelProgram::IF(Registers &r, Int4 &condition)
+	{
+		condition &= r.enableStack[r.enableIndex];
+
+		r.enableIndex++;
+		r.enableStack[r.enableIndex] = condition;
+
+		llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
+
+		Bool notAllFalse = SignMask(condition) != 0;
+
+		branch(notAllFalse, trueBlock, falseBlock);
+
+		isConditionalIf[ifDepth] = true;
+		ifFalseBlock[ifDepth] = falseBlock;
+
+		ifDepth++;
+		breakDepth++;
+	}
+
+	void PixelProgram::LABEL(int labelIndex)
+	{
+		if(!labelBlock[labelIndex])
+		{
+			labelBlock[labelIndex] = Nucleus::createBasicBlock();
+		}
+
+		Nucleus::setInsertBlock(labelBlock[labelIndex]);
+		currentLabel = labelIndex;
+	}
+
+	void PixelProgram::LOOP(Registers &r, const Src &integerRegister)
+	{
+		r.loopDepth++;
+
+		r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
+		r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData, ps.i[integerRegister.index][1]));
+		r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData, ps.i[integerRegister.index][2]));
+
+		//	If(r.increment[r.loopDepth] == 0)
+		//	{
+		//		r.increment[r.loopDepth] = 1;
+		//	}
+
+		llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
+
+		loopRepTestBlock[loopRepDepth] = testBlock;
+		loopRepEndBlock[loopRepDepth] = endBlock;
+
+		// FIXME: jump(testBlock)
+		Nucleus::createBr(testBlock);
+		Nucleus::setInsertBlock(testBlock);
+
+		branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
+		Nucleus::setInsertBlock(loopBlock);
+
+		r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
+
+		loopRepDepth++;
+		breakDepth = 0;
+	}
+
+	void PixelProgram::REP(Registers &r, const Src &integerRegister)
+	{
+		r.loopDepth++;
+
+		r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
+		r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
+
+		llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
+
+		loopRepTestBlock[loopRepDepth] = testBlock;
+		loopRepEndBlock[loopRepDepth] = endBlock;
+
+		// FIXME: jump(testBlock)
+		Nucleus::createBr(testBlock);
+		Nucleus::setInsertBlock(testBlock);
+
+		branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
+		Nucleus::setInsertBlock(loopBlock);
+
+		r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
+
+		loopRepDepth++;
+		breakDepth = 0;
+	}
+
+	void PixelProgram::WHILE(Registers &r, const Src &temporaryRegister)
+	{
+		r.enableIndex++;
+
+		llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
+		llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
+
+		loopRepTestBlock[loopRepDepth] = testBlock;
+		loopRepEndBlock[loopRepDepth] = endBlock;
+
+		Int4 restoreBreak = r.enableBreak;
+		Int4 restoreContinue = r.enableContinue;
+
+		// FIXME: jump(testBlock)
+		Nucleus::createBr(testBlock);
+		Nucleus::setInsertBlock(testBlock);
+		r.enableContinue = restoreContinue;
+
+		const Vector4f &src = fetchRegisterF(r, temporaryRegister);
+		Int4 condition = As<Int4>(src.x);
+		condition &= r.enableStack[r.enableIndex - 1];
+		r.enableStack[r.enableIndex] = condition;
+
+		Bool notAllFalse = SignMask(condition) != 0;
+		branch(notAllFalse, loopBlock, endBlock);
+
+		Nucleus::setInsertBlock(endBlock);
+		r.enableBreak = restoreBreak;
+
+		Nucleus::setInsertBlock(loopBlock);
+
+		loopRepDepth++;
+		breakDepth = 0;
+	}
+
+	void PixelProgram::RET(Registers &r)
+	{
+		if(currentLabel == -1)
+		{
+			returnBlock = Nucleus::createBasicBlock();
+			Nucleus::createBr(returnBlock);
+		}
+		else
+		{
+			llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
+
+			if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
+			{
+				// FIXME: Encapsulate
+				UInt index = r.callStack[--r.stackIndex];
+
+				llvm::Value *value = index.loadValue();
+				llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
+
+				for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
+				{
+					Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
+				}
+			}
+			else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
+			{
+				Nucleus::createBr(callRetBlock[currentLabel][0]);
+			}
+			else   // Function isn't called
+			{
+				Nucleus::createBr(unreachableBlock);
+			}
+
+			Nucleus::setInsertBlock(unreachableBlock);
+			Nucleus::createUnreachable();
+		}
+	}
+
+	void PixelProgram::LEAVE(Registers &r)
+	{
+		r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
+
+		// FIXME: Return from function if all instances left
+		// FIXME: Use enableLeave in other control-flow constructs
+	}
+

+}

diff --git a/src/Shader/PixelProgram.hpp b/src/Shader/PixelProgram.hpp
new file mode 100644
index 0000000..276b11c
--- /dev/null
+++ b/src/Shader/PixelProgram.hpp
@@ -0,0 +1,152 @@
+// SwiftShader Software Renderer

+//

+// Copyright(c) 2015 Google Inc.

+//

+// All rights reserved. No part of this software may be copied, distributed, transmitted,

+// transcribed, stored in a retrieval system, translated into any human or computer

+// language by any means, or disclosed to third parties without the explicit written

+// agreement of Google Inc. Without such an agreement, no rights or licenses, express

+// or implied, including but not limited to any patent rights, are granted to you.

+//

+

+#ifndef sw_PixelProgram_hpp

+#define sw_PixelProgram_hpp

+

+#include "PixelRoutine.hpp"

+

+namespace sw

+{

+	class PixelProgram : public PixelRoutine

+	{

+	public:

+		PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) :

+			PixelRoutine(state, shader), ifDepth(0), loopRepDepth(0), breakDepth(0), currentLabel(-1), whileTest(false)

+		{

+			for(int i = 0; i < 2048; ++i)
+			{
+				labelBlock[i] = 0;
+			}

+		}

+		virtual ~PixelProgram() {}

+	protected:

+		virtual void setBuiltins(PixelRoutine::Registers &r, Int &x, Int &y, Float4(&z)[4], Float4 &w);

+		virtual void applyShader(PixelRoutine::Registers &r, Int cMask[4]);

+		virtual Bool alphaTest(PixelRoutine::Registers &r, Int cMask[4]);

+		virtual void rasterOperation(PixelRoutine::Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);

+		virtual QuadRasterizer::Registers* createRegisters(const PixelShader *shader) { return new PixelProgram::Registers(shader); };

+

+	private:

+		struct Registers : public PixelRoutine::Registers
+		{
+			Registers(const PixelShader *shader) : PixelRoutine::Registers(shader), loopDepth(-1)
+			{
+				enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+
+				if(shader && shader->containsBreakInstruction())
+				{
+					enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+				}
+
+				if(shader && shader->containsContinueInstruction())
+				{
+					enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+				}
+			}
+
+			// Shader variables
+			Vector4f vPos;
+			Vector4f vFace;
+
+			// DX9 specific variables
+			Vector4f p0;
+			Array<Int, 4> aL;
+			Array<Int, 4> increment;
+			Array<Int, 4> iteration;
+
+			Int loopDepth;    // FIXME: Add support for switch
+			Int stackIndex;   // FIXME: Inc/decrement callStack
+			Array<UInt, 16> callStack;
+
+			// Per pixel based on conditions reached
+			Int enableIndex;
+			Array<Int4, 1 + 24> enableStack;
+			Int4 enableBreak;
+			Int4 enableContinue;
+			Int4 enableLeave;
+		};

+

+		void sampleTexture(Registers &r, Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false);
+		void sampleTexture(Registers &r, Vector4f &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false);
+

+		// Raster operations
+		void clampColor(Vector4f oC[4]);

+

+		Int4 enableMask(Registers &r, const Shader::Instruction *instruction);

+

+		Vector4f fetchRegisterF(Registers &r, const Src &src, int offset = 0);

+		Vector4f readConstant(Registers &r, const Src &src, int offset = 0);

+		Int relativeAddress(Registers &r, const Shader::Parameter &var);
+
+		Float4 linearToSRGB(const Float4 &x);
+

+		// Instructions
+		typedef Shader::Control Control;
+
+		void M3X2(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
+		void M3X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
+		void M3X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
+		void M4X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
+		void M4X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
+		void TEXLD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias);
+		void TEXLDD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project, bool bias);
+		void TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias);
+		void TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask);
+		void DISCARD(Registers &r, Int cMask[4], const Shader::Instruction *instruction);
+		void DFDX(Vector4f &dst, Vector4f &src);
+		void DFDY(Vector4f &dst, Vector4f &src);
+		void FWIDTH(Vector4f &dst, Vector4f &src);
+		void BREAK(Registers &r);
+		void BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control);
+		void BREAKP(Registers &r, const Src &predicateRegister);
+		void BREAK(Registers &r, Int4 &condition);
+		void CONTINUE(Registers &r);
+		void TEST();
+		void CALL(Registers &r, int labelIndex, int callSiteIndex);
+		void CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src);
+		void CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister);
+		void CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister);
+		void ELSE(Registers &r);
+		void ENDIF(Registers &r);
+		void ENDLOOP(Registers &r);
+		void ENDREP(Registers &r);
+		void ENDWHILE(Registers &r);
+		void IF(Registers &r, const Src &src);
+		void IFb(Registers &r, const Src &boolRegister);
+		void IFp(Registers &r, const Src &predicateRegister);
+		void IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control);
+		void IF(Registers &r, Int4 &condition);
+		void LABEL(int labelIndex);
+		void LOOP(Registers &r, const Src &integerRegister);
+		void REP(Registers &r, const Src &integerRegister);
+		void WHILE(Registers &r, const Src &temporaryRegister);
+		void RET(Registers &r);
+		void LEAVE(Registers &r);

+

+		int ifDepth;
+		int loopRepDepth;
+		int breakDepth;
+		int currentLabel;
+		bool whileTest;

+
+		// FIXME: Get rid of llvm::
+		llvm::BasicBlock *ifFalseBlock[24 + 24];
+		llvm::BasicBlock *loopRepTestBlock[4];
+		llvm::BasicBlock *loopRepEndBlock[4];
+		llvm::BasicBlock *labelBlock[2048];
+		std::vector<llvm::BasicBlock*> callRetBlock[2048];
+		llvm::BasicBlock *returnBlock;
+		bool isConditionalIf[24 + 24];

+	};

+}

+

+#endif

diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index ac27a4b..b53c031 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -25,28 +25,29 @@
 	extern bool complementaryDepthBuffer;
 	extern bool postBlendSRGB;
 	extern bool exactColorRounding;
-	extern bool booleanFaceRegister;
-	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
-	extern bool fullPixelPositionRegister;
+	extern bool forceClearRegisters;
 
-	PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : Rasterizer(state), shader(shader)
+	PixelRoutine::Registers::Registers(const PixelShader *shader) :
+		QuadRasterizer::Registers(),
+		rf(shader && shader->dynamicallyIndexedTemporaries),
+		vf(shader && shader->dynamicallyIndexedInput)
 	{
-		perturbate = false;
-		luminance = false;
-		previousScaling = false;
-
-		ifDepth = 0;
-		loopRepDepth = 0;
-		breakDepth = 0;
-		currentLabel = -1;
-		whileTest = false;
-
-		for(int i = 0; i < 2048; i++)
+		if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters)
 		{
-			labelBlock[i] = 0;
+			for(int i = 0; i < 10; i++)
+			{
+				vf[i].x = Float4(0.0f);
+				vf[i].y = Float4(0.0f);
+				vf[i].z = Float4(0.0f);
+				vf[i].w = Float4(0.0f);
+			}
 		}
 	}
 
+	PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader)
+	{
+	}
+
 	PixelRoutine::~PixelRoutine()
 	{
 		for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
@@ -55,8 +56,10 @@
 		}
 	}
 
-	void PixelRoutine::quad(Registers &r, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
+	void PixelRoutine::quad(QuadRasterizer::Registers &rBase, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
 	{
+		Registers& r = *static_cast<Registers*>(&rBase);
+
 		#if PERF_PROFILE
 			Long pipeTime = Ticks();
 		#endif
@@ -67,7 +70,6 @@
 		}
 
 		const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
-		const bool integerPipeline = shaderVersion() <= 0x0104;
 
 		Int zMask[4];   // Depth mask
 		Int sMask[4];   // Stencil mask
@@ -167,11 +169,11 @@
 					{
 						if(!state.interpolant[interpolant].centroid)
 						{
-							r.vf[interpolant][component] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive,V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
+							r.vf[interpolant][component] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
 						}
 						else
 						{
-							r.vf[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
+							r.vf[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
 						}
 					}
 				}
@@ -205,51 +207,7 @@
 				f = interpolate(xxxx, r.Df, rhw, r.primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective);
 			}
 
-			if(integerPipeline)
-			{
-				if(state.color[0].component & 0x1) r.diffuse.x = convertFixed12(r.vf[0].x); else r.diffuse.x = Short4(0x1000);
-				if(state.color[0].component & 0x2) r.diffuse.y = convertFixed12(r.vf[0].y); else r.diffuse.y = Short4(0x1000);
-				if(state.color[0].component & 0x4) r.diffuse.z = convertFixed12(r.vf[0].z); else r.diffuse.z = Short4(0x1000);
-				if(state.color[0].component & 0x8) r.diffuse.w = convertFixed12(r.vf[0].w); else r.diffuse.w = Short4(0x1000);
-
-				if(state.color[1].component & 0x1) r.specular.x = convertFixed12(r.vf[1].x); else r.specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-				if(state.color[1].component & 0x2) r.specular.y = convertFixed12(r.vf[1].y); else r.specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-				if(state.color[1].component & 0x4) r.specular.z = convertFixed12(r.vf[1].z); else r.specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-				if(state.color[1].component & 0x8) r.specular.w = convertFixed12(r.vf[1].w); else r.specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-			}
-			else if(shaderVersion() >= 0x0300)
-			{
-				if(shader->vPosDeclared)
-				{
-					if(!halfIntegerCoordinates)
-					{
-						r.vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
-						r.vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
-					}
-					else
-					{
-						r.vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
-						r.vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
-					}
-
-					if(fullPixelPositionRegister)
-					{
-						r.vPos.z = z[0];   // FIXME: Centroid?
-						r.vPos.w = w;      // FIXME: Centroid?
-					}
-				}
-
-				if(shader->vFaceDeclared)
-				{
-					Float4 area = *Pointer<Float>(r.primitive + OFFSET(Primitive,area));
-					Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
-
-					r.vFace.x = face;
-					r.vFace.y = face;
-					r.vFace.z = face;
-					r.vFace.w = face;
-				}
-			}
+			setBuiltins(r, x, y, z, w);
 
 			#if PERF_PROFILE
 				r.cycles[PERF_INTERP] += Ticks() - interpTime;
@@ -263,63 +221,13 @@
 					Long shaderTime = Ticks();
 				#endif
 
-				if(shader)
-				{
-				//	shader->print("PixelShader-%0.8X.txt", state.shaderID);
-
-					if(shader->getVersion() <= 0x0104)
-					{
-						ps_1_x(r, cMask);
-					}
-					else
-					{
-						ps_2_x(r, cMask);
-					}
-				}
-				else
-				{
-					r.current = r.diffuse;
-					Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
-
-					for(int stage = 0; stage < 8; stage++)
-					{
-						if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
-						{
-							break;
-						}
-
-						Vector4s texture;
-
-						if(state.textureStage[stage].usesTexture)
-						{
-							sampleTexture(r, texture, stage, stage);
-						}
-
-						blendTexture(r, temp, texture, stage);
-					}
-
-					specularPixel(r.current, r.specular);
-				}
+				applyShader(r, cMask);
 
 				#if PERF_PROFILE
 					r.cycles[PERF_SHADER] += Ticks() - shaderTime;
 				#endif
 
-				if(integerPipeline)
-				{
-					r.current.x = Min(r.current.x, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.x = Max(r.current.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-					r.current.y = Min(r.current.y, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.y = Max(r.current.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-					r.current.z = Min(r.current.z, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.z = Max(r.current.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-					r.current.w = Min(r.current.w, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.w = Max(r.current.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-
-					alphaPass = alphaTest(r, cMask, r.current);
-				}
-				else
-				{
-					clampColor(r.oC);
-
-					alphaPass = alphaTest(r, cMask, r.oC[0]);
-				}
+				alphaPass = alphaTest(r, cMask);
 
 				if((shader && shader->containsKill()) || state.alphaTestActive())
 				{
@@ -366,14 +274,7 @@
 							AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
 						#endif
 
-						if(integerPipeline)
-						{
-							rasterOperation(r.current, r, f, cBuffer[0], x, sMask, zMask, cMask);
-						}
-						else
-						{
-							rasterOperation(r.oC, r, f, cBuffer, x, sMask, zMask, cMask);
-						}
+						rasterOperation(r, f, cBuffer, x, sMask, zMask, cMask);
 					}
 				}
 
@@ -396,23 +297,6 @@
 		#endif
 	}
 
-	Float4 PixelRoutine::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
-	{
-		Float4 interpolant = D;
-
-		if(!flat)
-		{
-			interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16);
-
-			if(perspective)
-			{
-				interpolant *= rhw;
-			}
-		}
-
-		return interpolant;
-	}
-
 	Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
 	{
 		Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
@@ -656,912 +540,6 @@
 		return zMask != 0;
 	}
 
-	void PixelRoutine::blendTexture(Registers &r, Vector4s &temp, Vector4s &texture, int stage)
-	{
-		Vector4s *arg1;
-		Vector4s *arg2;
-		Vector4s *arg3;
-		Vector4s res;
-
-		Vector4s constant;
-		Vector4s tfactor;
-
-		const TextureStage::State &textureStage = state.textureStage[stage];
-
-		if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
-		   textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
-		   textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
-		   textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
-		   textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
-		   textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
-		{
-			constant.x = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[0]));
-			constant.y = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[1]));
-			constant.z = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[2]));
-			constant.w = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[3]));
-		}
-
-		if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
-		   textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
-		   textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
-		   textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
-		   textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
-		   textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
-		{
-			tfactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[0]));
-			tfactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[1]));
-			tfactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[2]));
-			tfactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]));
-		}
-
-		// Premodulate
-		if(stage > 0 && textureStage.usesTexture)
-		{
-			if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
-			{
-				r.current.x = MulHigh(r.current.x, texture.x) << 4;
-				r.current.y = MulHigh(r.current.y, texture.y) << 4;
-				r.current.z = MulHigh(r.current.z, texture.z) << 4;
-			}
-
-			if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
-			{
-				r.current.w = MulHigh(r.current.w, texture.w) << 4;
-			}
-		}
-
-		if(luminance)
-		{
-			texture.x = MulHigh(texture.x, r.L) << 4;
-			texture.y = MulHigh(texture.y, r.L) << 4;
-			texture.z = MulHigh(texture.z, r.L) << 4;
-
-			luminance = false;
-		}
-
-		switch(textureStage.firstArgument)
-		{
-		case TextureStage::SOURCE_TEXTURE:	arg1 = &texture;		break;
-		case TextureStage::SOURCE_CONSTANT:	arg1 = &constant;		break;
-		case TextureStage::SOURCE_CURRENT:	arg1 = &r.current;		break;
-		case TextureStage::SOURCE_DIFFUSE:	arg1 = &r.diffuse;		break;
-		case TextureStage::SOURCE_SPECULAR:	arg1 = &r.specular;		break;
-		case TextureStage::SOURCE_TEMP:		arg1 = &temp;			break;
-		case TextureStage::SOURCE_TFACTOR:	arg1 = &tfactor;		break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.secondArgument)
-		{
-		case TextureStage::SOURCE_TEXTURE:	arg2 = &texture;		break;
-		case TextureStage::SOURCE_CONSTANT:	arg2 = &constant;		break;
-		case TextureStage::SOURCE_CURRENT:	arg2 = &r.current;		break;
-		case TextureStage::SOURCE_DIFFUSE:	arg2 = &r.diffuse;		break;
-		case TextureStage::SOURCE_SPECULAR:	arg2 = &r.specular;		break;
-		case TextureStage::SOURCE_TEMP:		arg2 = &temp;			break;
-		case TextureStage::SOURCE_TFACTOR:	arg2 = &tfactor;		break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.thirdArgument)
-		{
-		case TextureStage::SOURCE_TEXTURE:	arg3 = &texture;		break;
-		case TextureStage::SOURCE_CONSTANT:	arg3 = &constant;		break;
-		case TextureStage::SOURCE_CURRENT:	arg3 = &r.current;		break;
-		case TextureStage::SOURCE_DIFFUSE:	arg3 = &r.diffuse;		break;
-		case TextureStage::SOURCE_SPECULAR:	arg3 = &r.specular;		break;
-		case TextureStage::SOURCE_TEMP:		arg3 = &temp;			break;
-		case TextureStage::SOURCE_TFACTOR:	arg3 = &tfactor;		break;
-		default:
-			ASSERT(false);
-		}
-
-		Vector4s mod1;
-		Vector4s mod2;
-		Vector4s mod3;
-
-		switch(textureStage.firstModifier)
-		{
-		case TextureStage::MODIFIER_COLOR:
-			break;
-		case TextureStage::MODIFIER_INVCOLOR:
-			{
-				mod1.x = SubSat(Short4(0x1000), arg1->x);
-				mod1.y = SubSat(Short4(0x1000), arg1->y);
-				mod1.z = SubSat(Short4(0x1000), arg1->z);
-				mod1.w = SubSat(Short4(0x1000), arg1->w);
-
-				arg1 = &mod1;
-			}
-			break;
-		case TextureStage::MODIFIER_ALPHA:
-			{
-				mod1.x = arg1->w;
-				mod1.y = arg1->w;
-				mod1.z = arg1->w;
-				mod1.w = arg1->w;
-
-				arg1 = &mod1;
-			}
-			break;
-		case TextureStage::MODIFIER_INVALPHA:
-			{
-				mod1.x = SubSat(Short4(0x1000), arg1->w);
-				mod1.y = SubSat(Short4(0x1000), arg1->w);
-				mod1.z = SubSat(Short4(0x1000), arg1->w);
-				mod1.w = SubSat(Short4(0x1000), arg1->w);
-
-				arg1 = &mod1;
-			}
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.secondModifier)
-		{
-		case TextureStage::MODIFIER_COLOR:
-			break;
-		case TextureStage::MODIFIER_INVCOLOR:
-			{
-				mod2.x = SubSat(Short4(0x1000), arg2->x);
-				mod2.y = SubSat(Short4(0x1000), arg2->y);
-				mod2.z = SubSat(Short4(0x1000), arg2->z);
-				mod2.w = SubSat(Short4(0x1000), arg2->w);
-
-				arg2 = &mod2;
-			}
-			break;
-		case TextureStage::MODIFIER_ALPHA:
-			{
-				mod2.x = arg2->w;
-				mod2.y = arg2->w;
-				mod2.z = arg2->w;
-				mod2.w = arg2->w;
-
-				arg2 = &mod2;
-			}
-			break;
-		case TextureStage::MODIFIER_INVALPHA:
-			{
-				mod2.x = SubSat(Short4(0x1000), arg2->w);
-				mod2.y = SubSat(Short4(0x1000), arg2->w);
-				mod2.z = SubSat(Short4(0x1000), arg2->w);
-				mod2.w = SubSat(Short4(0x1000), arg2->w);
-
-				arg2 = &mod2;
-			}
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.thirdModifier)
-		{
-		case TextureStage::MODIFIER_COLOR:
-			break;
-		case TextureStage::MODIFIER_INVCOLOR:
-			{
-				mod3.x = SubSat(Short4(0x1000), arg3->x);
-				mod3.y = SubSat(Short4(0x1000), arg3->y);
-				mod3.z = SubSat(Short4(0x1000), arg3->z);
-				mod3.w = SubSat(Short4(0x1000), arg3->w);
-
-				arg3 = &mod3;
-			}
-			break;
-		case TextureStage::MODIFIER_ALPHA:
-			{
-				mod3.x = arg3->w;
-				mod3.y = arg3->w;
-				mod3.z = arg3->w;
-				mod3.w = arg3->w;
-
-				arg3 = &mod3;
-			}
-			break;
-		case TextureStage::MODIFIER_INVALPHA:
-			{
-				mod3.x = SubSat(Short4(0x1000), arg3->w);
-				mod3.y = SubSat(Short4(0x1000), arg3->w);
-				mod3.z = SubSat(Short4(0x1000), arg3->w);
-				mod3.w = SubSat(Short4(0x1000), arg3->w);
-
-				arg3 = &mod3;
-			}
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.stageOperation)
-		{
-		case TextureStage::STAGE_DISABLE:
-			break;
-		case TextureStage::STAGE_SELECTARG1:					// Arg1
-			{
-				res.x = arg1->x;
-				res.y = arg1->y;
-				res.z = arg1->z;
-			}
-			break;
-		case TextureStage::STAGE_SELECTARG2:					// Arg2
-			{
-				res.x = arg2->x;
-				res.y = arg2->y;
-				res.z = arg2->z;
-			}
-			break;
-		case TextureStage::STAGE_SELECTARG3:					// Arg3
-			{
-				res.x = arg3->x;
-				res.y = arg3->y;
-				res.z = arg3->z;
-			}
-			break;
-		case TextureStage::STAGE_MODULATE:					// Arg1 * Arg2
-			{
-				res.x = MulHigh(arg1->x, arg2->x) << 4;
-				res.y = MulHigh(arg1->y, arg2->y) << 4;
-				res.z = MulHigh(arg1->z, arg2->z) << 4;
-			}
-			break;
-		case TextureStage::STAGE_MODULATE2X:					// Arg1 * Arg2 * 2
-			{
-				res.x = MulHigh(arg1->x, arg2->x) << 5;
-				res.y = MulHigh(arg1->y, arg2->y) << 5;
-				res.z = MulHigh(arg1->z, arg2->z) << 5;
-			}
-			break;
-		case TextureStage::STAGE_MODULATE4X:					// Arg1 * Arg2 * 4
-			{
-				res.x = MulHigh(arg1->x, arg2->x) << 6;
-				res.y = MulHigh(arg1->y, arg2->y) << 6;
-				res.z = MulHigh(arg1->z, arg2->z) << 6;
-			}
-			break;
-		case TextureStage::STAGE_ADD:						// Arg1 + Arg2
-			{
-				res.x = AddSat(arg1->x, arg2->x);
-				res.y = AddSat(arg1->y, arg2->y);
-				res.z = AddSat(arg1->z, arg2->z);
-			}
-			break;
-		case TextureStage::STAGE_ADDSIGNED:					// Arg1 + Arg2 - 0.5
-			{
-				res.x = AddSat(arg1->x, arg2->x);
-				res.y = AddSat(arg1->y, arg2->y);
-				res.z = AddSat(arg1->z, arg2->z);
-
-				res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-				res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-				res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			}
-			break;
-		case TextureStage::STAGE_ADDSIGNED2X:				// (Arg1 + Arg2 - 0.5) << 1
-			{
-				res.x = AddSat(arg1->x, arg2->x);
-				res.y = AddSat(arg1->y, arg2->y);
-				res.z = AddSat(arg1->z, arg2->z);
-
-				res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-				res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-				res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-
-				res.x = AddSat(res.x, res.x);
-				res.y = AddSat(res.y, res.y);
-				res.z = AddSat(res.z, res.z);
-			}
-			break;
-		case TextureStage::STAGE_SUBTRACT:					// Arg1 - Arg2
-			{
-				res.x = SubSat(arg1->x, arg2->x);
-				res.y = SubSat(arg1->y, arg2->y);
-				res.z = SubSat(arg1->z, arg2->z);
-			}
-			break;
-		case TextureStage::STAGE_ADDSMOOTH:					// Arg1 + Arg2 - Arg1 * Arg2
-			{
-				Short4 tmp;
-
-				tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
-				tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
-				tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
-			}
-			break;
-		case TextureStage::STAGE_MULTIPLYADD:				// Arg3 + Arg1 * Arg2
-			{
-				res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
-				res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
-				res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
-			}
-			break;
-		case TextureStage::STAGE_LERP:						// Arg3 * (Arg1 - Arg2) + Arg2
-			{
-				res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
-				res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
-				res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
-			}
-			break;
-		case TextureStage::STAGE_DOT3:						// 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
-			{
-				Short4 tmp;
-
-				res.x = SubSat(arg1->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.x = MulHigh(res.x, tmp);
-				res.y = SubSat(arg1->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.y = MulHigh(res.y, tmp);
-				res.z = SubSat(arg1->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.z = MulHigh(res.z, tmp);
-
-				res.x = res.x << 6;
-				res.y = res.y << 6;
-				res.z = res.z << 6;
-
-				res.x = AddSat(res.x, res.y);
-				res.x = AddSat(res.x, res.z);
-
-				// Clamp to [0, 1]
-				res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-				res.x = Min(res.x, Short4(0x1000));
-
-				res.y = res.x;
-				res.z = res.x;
-				res.w = res.x;
-			}
-			break;
-		case TextureStage::STAGE_BLENDCURRENTALPHA:			// Alpha * (Arg1 - Arg2) + Arg2
-			{
-				res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, r.current.w) << 4; res.x = AddSat(res.x, arg2->x);
-				res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, r.current.w) << 4; res.y = AddSat(res.y, arg2->y);
-				res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, r.current.w) << 4; res.z = AddSat(res.z, arg2->z);
-			}
-			break;
-		case TextureStage::STAGE_BLENDDIFFUSEALPHA:			// Alpha * (Arg1 - Arg2) + Arg2
-			{
-				res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, r.diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
-				res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, r.diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
-				res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, r.diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
-			}
-			break;
-		case TextureStage::STAGE_BLENDFACTORALPHA:			// Alpha * (Arg1 - Arg2) + Arg2
-			{
-				res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
-				res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
-				res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
-			}
-			break;
-		case TextureStage::STAGE_BLENDTEXTUREALPHA:			// Alpha * (Arg1 - Arg2) + Arg2
-			{
-				res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
-				res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
-				res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
-			}
-			break;
-		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:		// Arg1 + Arg2 * (1 - Alpha)
-			{
-				res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
-				res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
-				res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
-			}
-			break;
-		case TextureStage::STAGE_PREMODULATE:
-			{
-				res.x = arg1->x;
-				res.y = arg1->y;
-				res.z = arg1->z;
-			}
-			break;
-		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:		// Arg1 + Arg1.w * Arg2
-			{
-				res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
-				res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
-				res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
-			}
-			break;
-		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:		// Arg1 * Arg2 + Arg1.w
-			{
-				res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
-				res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
-				res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
-			}
-			break;
-		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:	// (1 - Arg1.w) * Arg2 + Arg1
-			{
-				Short4 tmp;
-
-				res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
-				res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
-				res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
-			}
-			break;
-		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:	// (1 - Arg1) * Arg2 + Arg1.w
-			{
-				Short4 tmp;
-
-				res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
-				res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
-				res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
-			}
-			break;
-		case TextureStage::STAGE_BUMPENVMAP:
-			{
-				r.du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
-				r.dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
-			
-				Float4 du2;
-				Float4 dv2;
-
-				du2 = r.du;
-				dv2 = r.dv;
-				r.du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
-				dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
-				r.du += dv2;
-				r.dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
-				du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
-				r.dv += du2;
-
-				perturbate = true;
-
-				res.x = r.current.x;
-				res.y = r.current.y;
-				res.z = r.current.z;
-				res.w = r.current.w;
-			}
-			break;
-		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
-			{
-				r.du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
-				r.dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
-			
-				Float4 du2;
-				Float4 dv2;
-
-				du2 = r.du;
-				dv2 = r.dv;
-
-				r.du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
-				dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
-				r.du += dv2;
-				r.dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
-				du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
-				r.dv += du2;
-
-				perturbate = true;
-
-				r.L = texture.z;
-				r.L = MulHigh(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceScale4)));
-				r.L = r.L << 4;
-				r.L = AddSat(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceOffset4)));
-				r.L = Max(r.L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-				r.L = Min(r.L, Short4(0x1000));
-
-				luminance = true;
-
-				res.x = r.current.x;
-				res.y = r.current.y;
-				res.z = r.current.z;
-				res.w = r.current.w;
-			}
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
-		{
-			switch(textureStage.firstArgumentAlpha)
-			{
-			case TextureStage::SOURCE_TEXTURE:	arg1 = &texture;		break;
-			case TextureStage::SOURCE_CONSTANT:	arg1 = &constant;		break;
-			case TextureStage::SOURCE_CURRENT:	arg1 = &r.current;		break;
-			case TextureStage::SOURCE_DIFFUSE:	arg1 = &r.diffuse;		break;
-			case TextureStage::SOURCE_SPECULAR:	arg1 = &r.specular;		break;
-			case TextureStage::SOURCE_TEMP:		arg1 = &temp;			break;
-			case TextureStage::SOURCE_TFACTOR:	arg1 = &tfactor;		break;
-			default:
-				ASSERT(false);
-			}
-
-			switch(textureStage.secondArgumentAlpha)
-			{
-			case TextureStage::SOURCE_TEXTURE:	arg2 = &texture;		break;
-			case TextureStage::SOURCE_CONSTANT:	arg2 = &constant;		break;
-			case TextureStage::SOURCE_CURRENT:	arg2 = &r.current;		break;
-			case TextureStage::SOURCE_DIFFUSE:	arg2 = &r.diffuse;		break;
-			case TextureStage::SOURCE_SPECULAR:	arg2 = &r.specular;		break;
-			case TextureStage::SOURCE_TEMP:		arg2 = &temp;			break;
-			case TextureStage::SOURCE_TFACTOR:	arg2 = &tfactor;		break;
-			default:
-				ASSERT(false);
-			}
-
-			switch(textureStage.thirdArgumentAlpha)
-			{
-			case TextureStage::SOURCE_TEXTURE:	arg3 = &texture;		break;
-			case TextureStage::SOURCE_CONSTANT:	arg3 = &constant;		break;
-			case TextureStage::SOURCE_CURRENT:	arg3 = &r.current;		break;
-			case TextureStage::SOURCE_DIFFUSE:	arg3 = &r.diffuse;		break;
-			case TextureStage::SOURCE_SPECULAR:	arg3 = &r.specular;		break;
-			case TextureStage::SOURCE_TEMP:		arg3 = &temp;			break;
-			case TextureStage::SOURCE_TFACTOR:	arg3 = &tfactor;		break;
-			default:
-				ASSERT(false);
-			}
-
-			switch(textureStage.firstModifierAlpha)   // FIXME: Check if actually used
-			{
-			case TextureStage::MODIFIER_COLOR:
-				break;
-			case TextureStage::MODIFIER_INVCOLOR:
-				{
-					mod1.w = SubSat(Short4(0x1000), arg1->w);
-
-					arg1 = &mod1;
-				}
-				break;
-			case TextureStage::MODIFIER_ALPHA:
-				{
-					// Redudant
-				}
-				break;
-			case TextureStage::MODIFIER_INVALPHA:
-				{
-					mod1.w = SubSat(Short4(0x1000), arg1->w);
-
-					arg1 = &mod1;
-				}
-				break;
-			default:
-				ASSERT(false);
-			}
-
-			switch(textureStage.secondModifierAlpha)   // FIXME: Check if actually used
-			{
-			case TextureStage::MODIFIER_COLOR:
-				break;
-			case TextureStage::MODIFIER_INVCOLOR:
-				{
-					mod2.w = SubSat(Short4(0x1000), arg2->w);
-
-					arg2 = &mod2;
-				}
-				break;
-			case TextureStage::MODIFIER_ALPHA:
-				{
-					// Redudant
-				}
-				break;
-			case TextureStage::MODIFIER_INVALPHA:
-				{
-					mod2.w = SubSat(Short4(0x1000), arg2->w);
-
-					arg2 = &mod2;
-				}
-				break;
-			default:
-				ASSERT(false);
-			}
-
-			switch(textureStage.thirdModifierAlpha)   // FIXME: Check if actually used
-			{
-			case TextureStage::MODIFIER_COLOR:
-				break;
-			case TextureStage::MODIFIER_INVCOLOR:
-				{
-					mod3.w = SubSat(Short4(0x1000), arg3->w);
-
-					arg3 = &mod3;
-				}
-				break;
-			case TextureStage::MODIFIER_ALPHA:
-				{
-					// Redudant
-				}
-				break;
-			case TextureStage::MODIFIER_INVALPHA:
-				{
-					mod3.w = SubSat(Short4(0x1000), arg3->w);
-
-					arg3 = &mod3;
-				}
-				break;
-			default:
-				ASSERT(false);
-			}
-		
-			switch(textureStage.stageOperationAlpha)
-			{
-			case TextureStage::STAGE_DISABLE:
-				break;
-			case TextureStage::STAGE_SELECTARG1:					// Arg1
-				{
-					res.w = arg1->w;
-				}
-				break;
-			case TextureStage::STAGE_SELECTARG2:					// Arg2
-				{
-					res.w = arg2->w;
-				}
-				break;
-			case TextureStage::STAGE_SELECTARG3:					// Arg3
-				{
-					res.w = arg3->w;
-				}
-				break;
-			case TextureStage::STAGE_MODULATE:					// Arg1 * Arg2
-				{
-					res.w = MulHigh(arg1->w, arg2->w) << 4;
-				}
-				break;
-			case TextureStage::STAGE_MODULATE2X:					// Arg1 * Arg2 * 2
-				{
-					res.w = MulHigh(arg1->w, arg2->w) << 5;
-				}
-				break;
-			case TextureStage::STAGE_MODULATE4X:					// Arg1 * Arg2 * 4
-				{
-					res.w = MulHigh(arg1->w, arg2->w) << 6;
-				}
-				break;
-			case TextureStage::STAGE_ADD:						// Arg1 + Arg2
-				{
-					res.w = AddSat(arg1->w, arg2->w);
-				}
-				break;
-			case TextureStage::STAGE_ADDSIGNED:					// Arg1 + Arg2 - 0.5
-				{
-					res.w = AddSat(arg1->w, arg2->w);
-					res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-				}
-				break;
-			case TextureStage::STAGE_ADDSIGNED2X:					// (Arg1 + Arg2 - 0.5) << 1
-				{
-					res.w = AddSat(arg1->w, arg2->w);
-					res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-					res.w = AddSat(res.w, res.w);
-				}
-				break;
-			case TextureStage::STAGE_SUBTRACT:					// Arg1 - Arg2
-				{
-					res.w = SubSat(arg1->w, arg2->w);
-				}
-				break;
-			case TextureStage::STAGE_ADDSMOOTH:					// Arg1 + Arg2 - Arg1 * Arg2
-				{
-					Short4 tmp;
-
-					tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
-				}
-				break;
-			case TextureStage::STAGE_MULTIPLYADD:				// Arg3 + Arg1 * Arg2
-				{
-					res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
-				}
-				break;
-			case TextureStage::STAGE_LERP:						// Arg3 * (Arg1 - Arg2) + Arg2
-				{
-					res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
-				}
-				break;
-			case TextureStage::STAGE_DOT3:
-				break;   // Already computed in color channel
-			case TextureStage::STAGE_BLENDCURRENTALPHA:			// Alpha * (Arg1 - Arg2) + Arg2
-				{
-					res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, r.current.w) << 4; res.w = AddSat(res.w, arg2->w);
-				}
-				break;
-			case TextureStage::STAGE_BLENDDIFFUSEALPHA:			// Arg1 * (Alpha) + Arg2 * (1 - Alpha)
-				{
-					res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, r.diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
-				}
-				break;
-			case TextureStage::STAGE_BLENDFACTORALPHA:
-				{
-					res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
-				}
-				break;
-			case TextureStage::STAGE_BLENDTEXTUREALPHA:			// Arg1 * (Alpha) + Arg2 * (1 - Alpha)
-				{
-					res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
-				}
-				break;
-			case TextureStage::STAGE_BLENDTEXTUREALPHAPM:		// Arg1 + Arg2 * (1 - Alpha)
-				{
-					res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
-				}
-				break;
-			case TextureStage::STAGE_PREMODULATE:
-				{
-					res.w = arg1->w;
-				}
-				break;
-			case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
-			case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
-			case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
-			case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
-			case TextureStage::STAGE_BUMPENVMAP:
-			case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
-				break;   // Invalid alpha operations
-			default:
-				ASSERT(false);
-			}
-		}
-
-		// Clamp result to [0, 1]
-
-		switch(textureStage.stageOperation)
-		{
-		case TextureStage::STAGE_DISABLE:
-		case TextureStage::STAGE_SELECTARG1:
-		case TextureStage::STAGE_SELECTARG2:
-		case TextureStage::STAGE_SELECTARG3:
-		case TextureStage::STAGE_MODULATE:
-		case TextureStage::STAGE_MODULATE2X:
-		case TextureStage::STAGE_MODULATE4X:
-		case TextureStage::STAGE_ADD:
-		case TextureStage::STAGE_MULTIPLYADD:
-		case TextureStage::STAGE_LERP:
-		case TextureStage::STAGE_BLENDCURRENTALPHA:
-		case TextureStage::STAGE_BLENDDIFFUSEALPHA:
-		case TextureStage::STAGE_BLENDFACTORALPHA:
-		case TextureStage::STAGE_BLENDTEXTUREALPHA:
-		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
-		case TextureStage::STAGE_DOT3:   // Already clamped
-		case TextureStage::STAGE_PREMODULATE:
-		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
-		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
-		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
-		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
-		case TextureStage::STAGE_BUMPENVMAP:
-		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
-			if(state.textureStage[stage].cantUnderflow)
-			{
-				break;   // Can't go below zero
-			}
-		case TextureStage::STAGE_ADDSIGNED:
-		case TextureStage::STAGE_ADDSIGNED2X:
-		case TextureStage::STAGE_SUBTRACT:
-		case TextureStage::STAGE_ADDSMOOTH:
-			res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-			res.y = Max(res.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-			res.z = Max(res.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.stageOperationAlpha)
-		{
-		case TextureStage::STAGE_DISABLE:
-		case TextureStage::STAGE_SELECTARG1:
-		case TextureStage::STAGE_SELECTARG2:
-		case TextureStage::STAGE_SELECTARG3:
-		case TextureStage::STAGE_MODULATE:
-		case TextureStage::STAGE_MODULATE2X:
-		case TextureStage::STAGE_MODULATE4X:
-		case TextureStage::STAGE_ADD:
-		case TextureStage::STAGE_MULTIPLYADD:
-		case TextureStage::STAGE_LERP:
-		case TextureStage::STAGE_BLENDCURRENTALPHA:
-		case TextureStage::STAGE_BLENDDIFFUSEALPHA:
-		case TextureStage::STAGE_BLENDFACTORALPHA:
-		case TextureStage::STAGE_BLENDTEXTUREALPHA:
-		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
-		case TextureStage::STAGE_DOT3:   // Already clamped
-		case TextureStage::STAGE_PREMODULATE:
-		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
-		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
-		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
-		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
-		case TextureStage::STAGE_BUMPENVMAP:
-		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
-			if(state.textureStage[stage].cantUnderflow)
-			{
-				break;   // Can't go below zero
-			}
-		case TextureStage::STAGE_ADDSIGNED:
-		case TextureStage::STAGE_ADDSIGNED2X:
-		case TextureStage::STAGE_SUBTRACT:
-		case TextureStage::STAGE_ADDSMOOTH:
-			res.w = Max(res.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.stageOperation)
-		{
-		case TextureStage::STAGE_DISABLE:
-		case TextureStage::STAGE_SELECTARG1:
-		case TextureStage::STAGE_SELECTARG2:
-		case TextureStage::STAGE_SELECTARG3:
-		case TextureStage::STAGE_MODULATE:
-		case TextureStage::STAGE_SUBTRACT:
-		case TextureStage::STAGE_ADDSMOOTH:
-		case TextureStage::STAGE_LERP:
-		case TextureStage::STAGE_BLENDCURRENTALPHA:
-		case TextureStage::STAGE_BLENDDIFFUSEALPHA:
-		case TextureStage::STAGE_BLENDFACTORALPHA:
-		case TextureStage::STAGE_BLENDTEXTUREALPHA:
-		case TextureStage::STAGE_DOT3:   // Already clamped
-		case TextureStage::STAGE_PREMODULATE:
-		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
-		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
-		case TextureStage::STAGE_BUMPENVMAP:
-		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
-			break;   // Can't go above one
-		case TextureStage::STAGE_MODULATE2X:
-		case TextureStage::STAGE_MODULATE4X:
-		case TextureStage::STAGE_ADD:
-		case TextureStage::STAGE_ADDSIGNED:
-		case TextureStage::STAGE_ADDSIGNED2X:
-		case TextureStage::STAGE_MULTIPLYADD:
-		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
-		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
-		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
-			res.x = Min(res.x, Short4(0x1000));
-			res.y = Min(res.y, Short4(0x1000));
-			res.z = Min(res.z, Short4(0x1000));
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.stageOperationAlpha)
-		{
-		case TextureStage::STAGE_DISABLE:
-		case TextureStage::STAGE_SELECTARG1:
-		case TextureStage::STAGE_SELECTARG2:
-		case TextureStage::STAGE_SELECTARG3:
-		case TextureStage::STAGE_MODULATE:
-		case TextureStage::STAGE_SUBTRACT:
-		case TextureStage::STAGE_ADDSMOOTH:
-		case TextureStage::STAGE_LERP:
-		case TextureStage::STAGE_BLENDCURRENTALPHA:
-		case TextureStage::STAGE_BLENDDIFFUSEALPHA:
-		case TextureStage::STAGE_BLENDFACTORALPHA:
-		case TextureStage::STAGE_BLENDTEXTUREALPHA:
-		case TextureStage::STAGE_DOT3:   // Already clamped
-		case TextureStage::STAGE_PREMODULATE:
-		case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
-		case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
-		case TextureStage::STAGE_BUMPENVMAP:
-		case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
-			break;   // Can't go above one
-		case TextureStage::STAGE_MODULATE2X:
-		case TextureStage::STAGE_MODULATE4X:
-		case TextureStage::STAGE_ADD:
-		case TextureStage::STAGE_ADDSIGNED:
-		case TextureStage::STAGE_ADDSIGNED2X:
-		case TextureStage::STAGE_MULTIPLYADD:
-		case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
-		case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
-		case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
-			res.w = Min(res.w, Short4(0x1000));
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		switch(textureStage.destinationArgument)
-		{
-		case TextureStage::DESTINATION_CURRENT:
-			r.current.x = res.x;
-			r.current.y = res.y;
-			r.current.z = res.z;
-			r.current.w = res.w;
-			break;
-		case TextureStage::DESTINATION_TEMP:
-			temp.x = res.x;
-			temp.y = res.y;
-			temp.z = res.z;
-			temp.w = res.w;
-			break;
-		default:
-			ASSERT(false);
-		}
-	}
-
 	void PixelRoutine::alphaTest(Registers &r, Int &aMask, Short4 &alpha)
 	{
 		Short4 cmp;
@@ -1624,103 +602,6 @@
 		cMask[3] &= aMask3;
 	}
 
-	Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Vector4s &current)
-	{
-		if(!state.alphaTestActive())
-		{
-			return true;
-		}
-
-		Int aMask;
-
-		if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
-		{
-			alphaTest(r, aMask, current.w);
-
-			for(unsigned int q = 0; q < state.multiSample; q++)
-			{
-				cMask[q] &= aMask;
-			}
-		}
-		else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
-		{
-			Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000);
-
-			alphaToCoverage(r, cMask, alpha);
-		}
-		else ASSERT(false);
-
-		Int pass = cMask[0];
-
-		for(unsigned int q = 1; q < state.multiSample; q++)
-		{
-			pass = pass | cMask[q];
-		}
-
-		return pass != 0x0;
-	}
-
-	Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Vector4f &c0)
-	{
-		if(!state.alphaTestActive())
-		{
-			return true;
-		}
-
-		Int aMask;
-
-		if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
-		{
-			Short4 alpha = RoundShort4(c0.w * Float4(0x1000));
-
-			alphaTest(r, aMask, alpha);
-
-			for(unsigned int q = 0; q < state.multiSample; q++)
-			{
-				cMask[q] &= aMask;
-			}
-		}
-		else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
-		{
-			alphaToCoverage(r, cMask, c0.w);
-		}
-		else ASSERT(false);
-
-		Int pass = cMask[0];
-
-		for(unsigned int q = 1; q < state.multiSample; q++)
-		{
-			pass = pass | cMask[q];
-		}
-
-		return pass != 0x0;
-	}
-
-	void PixelRoutine::fogBlend(Registers &r, Vector4s &current, Float4 &f, Float4 &z, Float4 &rhw)
-	{
-		if(!state.fogActive)
-		{
-			return;
-		}
-
-		if(state.pixelFogMode != FOG_NONE)
-		{
-			pixelFog(r, f, z, rhw);
-		}
-		
-		UShort4 fog = convertFixed16(f, true);
-
-		current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
-		current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
-		current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
-
-		UShort4 invFog = UShort4(0xFFFFu) - fog;
-
-		current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[0]))));
-		current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[1]))));
-		current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[2]))));
-	}
-
 	void PixelRoutine::fogBlend(Registers &r, Vector4f &c0, Float4 &fog, Float4 &z, Float4 &rhw)
 	{
 		if(!state.fogActive)
@@ -1795,18 +676,6 @@
 		}
 	}
 
-	void PixelRoutine::specularPixel(Vector4s &current, Vector4s &specular)
-	{
-		if(!state.specularAdd)
-		{
-			return;
-		}
-
-		current.x = AddSat(current.x, specular.x);
-		current.y = AddSat(current.y, specular.y);
-		current.z = AddSat(current.z, specular.z);
-	}
-
 	void PixelRoutine::writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
 	{
 		if(!state.depthWriteEnable)
@@ -2010,294 +879,6 @@
 		}
 	}
 
-	void PixelRoutine::sampleTexture(Registers &r, Vector4s &c, int coordinates, int stage, bool project)
-	{
-		Float4 u = r.vf[2 + coordinates].x;
-		Float4 v = r.vf[2 + coordinates].y;
-		Float4 w = r.vf[2 + coordinates].z;
-		Float4 q = r.vf[2 + coordinates].w;
-
-		if(perturbate)
-		{
-			u += r.du;
-			v += r.dv;
-
-			perturbate = false;
-		}
-
-		sampleTexture(r, c, stage, u, v, w, q, project);
-	}
-
-	void PixelRoutine::sampleTexture(Registers &r, Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project, bool bias)
-	{
-		Vector4f dsx;
-		Vector4f dsy;
-
-		sampleTexture(r, c, stage, u, v, w, q, dsx, dsy, project, bias, false);
-	}
-
-	void PixelRoutine::sampleTexture(Registers &r, Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
-	{
-		#if PERF_PROFILE
-			Long texTime = Ticks();
-		#endif
-
-		Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap) + stage * sizeof(Texture);
-
-		if(!project)
-		{
-			sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, gradients, lodProvided);
-		}
-		else
-		{
-			Float4 rq = reciprocal(q);
-
-			Float4 u_q = u * rq;
-			Float4 v_q = v * rq;
-			Float4 w_q = w * rq;
-
-			sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, gradients, lodProvided);
-		}
-
-		#if PERF_PROFILE
-			r.cycles[PERF_TEX] += Ticks() - texTime;
-		#endif
-	}
-
-	void PixelRoutine::sampleTexture(Registers &r, Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
-	{
-		if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
-		{	
-			sampleTexture(r, c, sampler.index, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided);	
-		}
-		else
-		{
-			Int index = As<Int>(Float(fetchRegisterF(r, sampler).x.x));
-
-			for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
-			{
-				if(shader->usesSampler(i))
-				{
-					If(index == i)
-					{
-						sampleTexture(r, c, i, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided);
-						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
-					}
-				}
-			}
-		}
-	}
-
-	void PixelRoutine::sampleTexture(Registers &r, Vector4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
-	{
-		#if PERF_PROFILE
-			Long texTime = Ticks();
-		#endif
-
-		Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap) + stage * sizeof(Texture);
-
-		if(!project)
-		{
-			sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, gradients, lodProvided);
-		}
-		else
-		{
-			Float4 rq = reciprocal(q);
-
-			Float4 u_q = u * rq;
-			Float4 v_q = v * rq;
-			Float4 w_q = w * rq;
-
-			sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, gradients, lodProvided);
-		}
-
-		#if PERF_PROFILE
-			r.cycles[PERF_TEX] += Ticks() - texTime;
-		#endif
-	}
-
-	void PixelRoutine::clampColor(Vector4f oC[4])
-	{
-		for(int index = 0; index < 4; index++)
-		{
-			if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
-			{
-				continue;
-			}
-
-			switch(state.targetFormat[index])
-			{
-			case FORMAT_NULL:
-				break;
-			case FORMAT_R5G6B5:
-			case FORMAT_A8R8G8B8:
-			case FORMAT_A8B8G8R8:
-			case FORMAT_X8R8G8B8:
-			case FORMAT_X8B8G8R8:
-			case FORMAT_A8:
-			case FORMAT_G16R16:
-			case FORMAT_A16B16G16R16:
-				oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
-				oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
-				oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
-				oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
-				break;
-			case FORMAT_R32F:
-			case FORMAT_G32R32F:
-			case FORMAT_A32B32G32R32F:
-				break;
-			default:
-				ASSERT(false);
-			}
-		}
-	}
-
-	void PixelRoutine::rasterOperation(Vector4s &current, Registers &r, Float4 &fog, Pointer<Byte> &cBuffer, Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
-	{
-		if(!state.colorWriteActive(0))
-		{
-			return;
-		}
-
-		Vector4f oC;
-
-		switch(state.targetFormat[0])
-		{
-		case FORMAT_R5G6B5:
-		case FORMAT_X8R8G8B8:
-		case FORMAT_X8B8G8R8:
-		case FORMAT_A8R8G8B8:
-		case FORMAT_A8B8G8R8:
-		case FORMAT_A8:
-		case FORMAT_G16R16:
-		case FORMAT_A16B16G16R16:
-			if(!postBlendSRGB && state.writeSRGB)
-			{
-				linearToSRGB12_16(r, current);
-			}
-			else
-			{
-				current.x <<= 4;
-				current.y <<= 4;
-				current.z <<= 4;
-				current.w <<= 4;
-			}
-
-			if(state.targetFormat[0] == FORMAT_R5G6B5)
-			{
-				current.x &= Short4(0xF800u);
-				current.y &= Short4(0xFC00u);
-				current.z &= Short4(0xF800u);
-			}
-
-			fogBlend(r, current, fog, r.z[0], r.rhw);
-
-			for(unsigned int q = 0; q < state.multiSample; q++)
-			{
-				Pointer<Byte> buffer = cBuffer + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[0]));
-				Vector4s color = current;
-
-				if(state.multiSampleMask & (1 << q))
-				{
-					alphaBlend(r, 0, buffer, color, x);
-					logicOperation(r, 0, buffer, color, x);
-					writeColor(r, 0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
-				}
-			}
-			break;
-		case FORMAT_R32F:
-		case FORMAT_G32R32F:
-		case FORMAT_A32B32G32R32F:
-			convertSigned12(oC, current);
-			fogBlend(r, oC, fog, r.z[0], r.rhw);
-			
-			for(unsigned int q = 0; q < state.multiSample; q++)
-			{
-				Pointer<Byte> buffer = cBuffer + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[0]));
-				Vector4f color = oC;
-
-				if(state.multiSampleMask & (1 << q))
-				{
-					alphaBlend(r, 0, buffer, color, x);
-					writeColor(r, 0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
-				}
-			}
-			break;
-		default:
-			ASSERT(false);
-		}
-	}
-
-	void PixelRoutine::rasterOperation(Vector4f oC[4], Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
-	{
-		for(int index = 0; index < 4; index++)
-		{
-			if(!state.colorWriteActive(index))
-			{
-				continue;
-			}
-
-			if(!postBlendSRGB && state.writeSRGB)
-			{
-				oC[index].x = linearToSRGB(oC[index].x);
-				oC[index].y = linearToSRGB(oC[index].y);
-				oC[index].z = linearToSRGB(oC[index].z);
-			}
-
-			if(index == 0)
-			{
-				fogBlend(r, oC[index], fog, r.z[0], r.rhw);
-			}
-
-			switch(state.targetFormat[index])
-			{
-			case FORMAT_R5G6B5:
-			case FORMAT_X8R8G8B8:
-			case FORMAT_X8B8G8R8:
-			case FORMAT_A8R8G8B8:
-			case FORMAT_A8B8G8R8:
-			case FORMAT_A8:
-			case FORMAT_G16R16:
-			case FORMAT_A16B16G16R16:
-				for(unsigned int q = 0; q < state.multiSample; q++)
-				{
-					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[index]));
-					Vector4s color;
-
-					color.x = convertFixed16(oC[index].x, false);
-					color.y = convertFixed16(oC[index].y, false);
-					color.z = convertFixed16(oC[index].z, false);
-					color.w = convertFixed16(oC[index].w, false);
-
-					if(state.multiSampleMask & (1 << q))
-					{
-						alphaBlend(r, index, buffer, color, x);
-						logicOperation(r, index, buffer, color, x);
-						writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
-					}
-				}
-				break;
-			case FORMAT_R32F:
-			case FORMAT_G32R32F:
-			case FORMAT_A32B32G32R32F:
-				for(unsigned int q = 0; q < state.multiSample; q++)
-				{
-					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[index]));
-					Vector4f color = oC[index];
-
-					if(state.multiSampleMask & (1 << q))
-					{
-						alphaBlend(r, index, buffer, color, x);
-						writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
-					}
-				}
-				break;
-			default:
-				ASSERT(false);
-			}
-		}
-	}
-
 	void PixelRoutine::blendFactor(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
 	{
 		switch(blendFactorActive)
@@ -3741,629 +2322,11 @@
 		}
 	}
 
-	void PixelRoutine::ps_1_x(Registers &r, Int cMask[4])
-	{
-		int pad = 0;        // Count number of texm3x3pad instructions
-		Vector4s dPairing;   // Destination for first pairing instruction
-
-		for(size_t i = 0; i < shader->getLength(); i++)
-		{
-			const Shader::Instruction *instruction = shader->getInstruction(i);
-			Shader::Opcode opcode = instruction->opcode;
-
-		//	#ifndef NDEBUG   // FIXME: Centralize debug output control
-		//		shader->printInstruction(i, "debug.txt");
-		//	#endif
-
-			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
-			{
-				continue;
-			}
-
-			const Dst &dst = instruction->dst;
-			const Src &src0 = instruction->src[0];
-			const Src &src1 = instruction->src[1];
-			const Src &src2 = instruction->src[2];
-
-			unsigned short version = shader->getVersion();
-			bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue;   // First instruction of pair
-			bool coissue = instruction->coissue;                                                              // Second instruction of pair
-
-			Vector4s d;
-			Vector4s s0;
-			Vector4s s1;
-			Vector4s s2;
-
-			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterS(r, src0);
-			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterS(r, src1);
-			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterS(r, src2);
-
-			Float4 u = version < 0x0104 ? r.vf[2 + dst.index].x : r.vf[2 + src0.index].x;
-			Float4 v = version < 0x0104 ? r.vf[2 + dst.index].y : r.vf[2 + src0.index].y;
-			Float4 s = version < 0x0104 ? r.vf[2 + dst.index].z : r.vf[2 + src0.index].z;
-			Float4 t = version < 0x0104 ? r.vf[2 + dst.index].w : r.vf[2 + src0.index].w;
-
-			switch(opcode)
-			{
-			case Shader::OPCODE_PS_1_0:															break;
-			case Shader::OPCODE_PS_1_1:															break;
-			case Shader::OPCODE_PS_1_2:															break;
-			case Shader::OPCODE_PS_1_3:															break;
-			case Shader::OPCODE_PS_1_4:															break;
-
-			case Shader::OPCODE_DEF:															break;
-
-			case Shader::OPCODE_NOP:															break;
-			case Shader::OPCODE_MOV:			MOV(d, s0);										break;
-			case Shader::OPCODE_ADD:			ADD(d, s0, s1);									break;
-			case Shader::OPCODE_SUB:			SUB(d, s0, s1);									break;
-			case Shader::OPCODE_MAD:			MAD(d, s0, s1, s2);								break;
-			case Shader::OPCODE_MUL:			MUL(d, s0, s1);									break;
-			case Shader::OPCODE_DP3:			DP3(d, s0, s1);									break;
-			case Shader::OPCODE_DP4:			DP4(d, s0, s1);									break;
-			case Shader::OPCODE_LRP:			LRP(d, s0, s1, s2);								break;
-			case Shader::OPCODE_TEXCOORD:
-				if(version < 0x0104)
-				{
-					TEXCOORD(d, u, v, s, dst.index);
-				}
-				else
-				{
-					if((src0.swizzle & 0x30) == 0x20)   // .xyz
-					{
-						TEXCRD(d, u, v, s, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
-					}
-					else   // .xyw
-					{
-						TEXCRD(d, u, v, t, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
-					}
-				}
-				break;
-			case Shader::OPCODE_TEXKILL:
-				if(version < 0x0104)
-				{
-					TEXKILL(cMask, u, v, s);
-				}
-				else if(version == 0x0104)
-				{
-					if(dst.type == Shader::PARAMETER_TEXTURE)
-					{
-						TEXKILL(cMask, u, v, s);
-					}
-					else
-					{
-						TEXKILL(cMask, r.rs[dst.index]);
-					}
-				}
-				else ASSERT(false);
-				break;
-			case Shader::OPCODE_TEX:
-				if(version < 0x0104)
-				{
-					TEX(r, d, u, v, s, dst.index, false);
-				}
-				else if(version == 0x0104)
-				{
-					if(src0.type == Shader::PARAMETER_TEXTURE)
-					{
-						if((src0.swizzle & 0x30) == 0x20)   // .xyz
-						{
-							TEX(r, d, u, v, s, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
-						}
-						else   // .xyw
-						{
-							TEX(r, d, u, v, t, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
-						}
-					}
-					else
-					{
-						TEXLD(r, d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
-					}
-				}
-				else ASSERT(false);
-				break;
-			case Shader::OPCODE_TEXBEM:			TEXBEM(r, d, s0, u, v, s, dst.index);	break;
-			case Shader::OPCODE_TEXBEML:		TEXBEML(r, d, s0, u, v, s, dst.index);	break;
-			case Shader::OPCODE_TEXREG2AR:		TEXREG2AR(r, d, s0, dst.index);					break;
-			case Shader::OPCODE_TEXREG2GB:		TEXREG2GB(r, d, s0, dst.index);					break;
-			case Shader::OPCODE_TEXM3X2PAD:		TEXM3X2PAD(r, u, v, s, s0, 0, src0.modifier == Shader::MODIFIER_SIGN);	break;
-			case Shader::OPCODE_TEXM3X2TEX:		TEXM3X2TEX(r, d, u, v, s, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN);	break;
-			case Shader::OPCODE_TEXM3X3PAD:		TEXM3X3PAD(r, u, v, s, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN);	break;
-			case Shader::OPCODE_TEXM3X3TEX:		TEXM3X3TEX(r, d, u, v, s, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN);	break;
-			case Shader::OPCODE_TEXM3X3SPEC:	TEXM3X3SPEC(r, d, u, v, s, dst.index, s0, s1);		break;
-			case Shader::OPCODE_TEXM3X3VSPEC:	TEXM3X3VSPEC(r, d, u, v, s, dst.index, s0);		break;
-			case Shader::OPCODE_CND:			CND(d, s0, s1, s2);								break;
-			case Shader::OPCODE_TEXREG2RGB:		TEXREG2RGB(r, d, s0, dst.index);				break;
-			case Shader::OPCODE_TEXDP3TEX:		TEXDP3TEX(r, d, u, v, s, dst.index, s0);	break;
-			case Shader::OPCODE_TEXM3X2DEPTH:	TEXM3X2DEPTH(r, d, u, v, s, s0, src0.modifier == Shader::MODIFIER_SIGN);	break;
-			case Shader::OPCODE_TEXDP3:			TEXDP3(r, d, u, v, s, s0);				break;
-			case Shader::OPCODE_TEXM3X3:		TEXM3X3(r, d, u, v, s, s0, src0.modifier == Shader::MODIFIER_SIGN); 	break;
-			case Shader::OPCODE_TEXDEPTH:		TEXDEPTH(r);									break;
-			case Shader::OPCODE_CMP0:			CMP(d, s0, s1, s2);								break;
-			case Shader::OPCODE_BEM:			BEM(r, d, s0, s1, dst.index);					break;
-			case Shader::OPCODE_PHASE:															break;
-			case Shader::OPCODE_END:															break;
-			default:
-				ASSERT(false);
-			}
-
-			if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
-			{
-				if(dst.shift > 0)
-				{
-					if(dst.mask & 0x1) {d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x);}
-					if(dst.mask & 0x2) {d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y);}
-					if(dst.mask & 0x4) {d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z);}
-					if(dst.mask & 0x8) {d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w);}
-				}
-				else if(dst.shift < 0)
-				{
-					if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
-					if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
-					if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
-					if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
-				}
-
-				if(dst.saturate)
-				{
-					if(dst.mask & 0x1) {d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
-					if(dst.mask & 0x2) {d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
-					if(dst.mask & 0x4) {d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
-					if(dst.mask & 0x8) {d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
-				}
-
-				if(pairing)
-				{
-					if(dst.mask & 0x1) dPairing.x = d.x;
-					if(dst.mask & 0x2) dPairing.y = d.y;
-					if(dst.mask & 0x4) dPairing.z = d.z;
-					if(dst.mask & 0x8) dPairing.w = d.w;
-				}
-			
-				if(coissue)
-				{
-					const Dst &dst = shader->getInstruction(i - 1)->dst;
-
-					writeDestination(r, dPairing, dst);
-				}
-			
-				if(!pairing)
-				{
-					writeDestination(r, d, dst);
-				}
-			}
-		}
-	}
-
-	void PixelRoutine::ps_2_x(Registers &r, Int cMask[4])
-	{
-		r.enableIndex = 0;
-		r.stackIndex = 0;
-
-		if(shader->containsLeaveInstruction())
-		{
-			r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-		}
-
-		bool out[4][4] = {false};
-
-		// Create all call site return blocks up front
-		for(size_t i = 0; i < shader->getLength(); i++)
-		{
-			const Shader::Instruction *instruction = shader->getInstruction(i);
-			Shader::Opcode opcode = instruction->opcode;
-
-			if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
-			{
-				const Dst &dst = instruction->dst;
-
-				ASSERT(callRetBlock[dst.label].size() == dst.callSite);
-				callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
-			}
-		}
-		
-		for(size_t i = 0; i < shader->getLength(); i++)
-		{
-			const Shader::Instruction *instruction = shader->getInstruction(i);
-			Shader::Opcode opcode = instruction->opcode;
-
-			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
-			{
-				continue;
-			}
-
-			const Dst &dst = instruction->dst;
-			const Src &src0 = instruction->src[0];
-			const Src &src1 = instruction->src[1];
-			const Src &src2 = instruction->src[2];
-			const Src &src3 = instruction->src[3];
-
-			bool predicate = instruction->predicate;
-			Control control = instruction->control;
-			bool pp = dst.partialPrecision;
-			bool project = instruction->project;
-			bool bias = instruction->bias;
-
-			Vector4f d;
-			Vector4f s0;
-			Vector4f s1;
-			Vector4f s2;
-			Vector4f s3;
-
-			if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
-			{
-				if(dst.type == Shader::PARAMETER_TEXTURE)
-				{
-					d.x = r.vf[2 + dst.index].x;
-					d.y = r.vf[2 + dst.index].y;
-					d.z = r.vf[2 + dst.index].z;
-					d.w = r.vf[2 + dst.index].w;
-				}
-				else
-				{
-					d = r.rf[dst.index];
-				}
-			}
-
-			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
-			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
-			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
-			if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(r, src3);
-
-			switch(opcode)
-			{
-			case Shader::OPCODE_PS_2_0:														break;
-			case Shader::OPCODE_PS_2_x:														break;
-			case Shader::OPCODE_PS_3_0:														break;
-			case Shader::OPCODE_DEF:														break;
-			case Shader::OPCODE_DCL:														break;
-			case Shader::OPCODE_NOP:														break;
-			case Shader::OPCODE_MOV:		mov(d, s0);										break;
-			case Shader::OPCODE_F2B:		f2b(d, s0);										break;
-			case Shader::OPCODE_B2F:		b2f(d, s0);										break;
-			case Shader::OPCODE_ADD:		add(d, s0, s1);									break;
-			case Shader::OPCODE_SUB:		sub(d, s0, s1);									break;
-			case Shader::OPCODE_MUL:		mul(d, s0, s1);									break;
-			case Shader::OPCODE_MAD:		mad(d, s0, s1, s2);								break;
-			case Shader::OPCODE_DP1:		dp1(d, s0, s1);									break;
-			case Shader::OPCODE_DP2:		dp2(d, s0, s1);									break;
-			case Shader::OPCODE_DP2ADD:		dp2add(d, s0, s1, s2);							break;
-			case Shader::OPCODE_DP3:		dp3(d, s0, s1);									break;
-			case Shader::OPCODE_DP4:		dp4(d, s0, s1);									break;
-			case Shader::OPCODE_CMP0:		cmp0(d, s0, s1, s2);							break;
-			case Shader::OPCODE_ICMP:		icmp(d, s0, s1, control);						break;
-			case Shader::OPCODE_SELECT:		select(d, s0, s1, s2);							break;
-			case Shader::OPCODE_EXTRACT:	extract(d.x, s0, s1.x);							break;
-			case Shader::OPCODE_INSERT:		insert(d, s0, s1.x, s2.x);						break;
-			case Shader::OPCODE_FRC:		frc(d, s0);										break;
-			case Shader::OPCODE_TRUNC:      trunc(d, s0);                                   break;
-			case Shader::OPCODE_FLOOR:      floor(d, s0);                                   break;
-			case Shader::OPCODE_ROUND:		round(d, s0);                                   break;
-			case Shader::OPCODE_ROUNDEVEN:	roundEven(d, s0);                               break;
-			case Shader::OPCODE_CEIL:       ceil(d, s0);                                    break;
-			case Shader::OPCODE_EXP2X:		exp2x(d, s0, pp);								break;
-			case Shader::OPCODE_EXP2:		exp2(d, s0, pp);								break;
-			case Shader::OPCODE_LOG2X:		log2x(d, s0, pp);								break;
-			case Shader::OPCODE_LOG2:		log2(d, s0, pp);								break;
-			case Shader::OPCODE_EXP:		exp(d, s0, pp);									break;
-			case Shader::OPCODE_LOG:		log(d, s0, pp);									break;
-			case Shader::OPCODE_RCPX:		rcpx(d, s0, pp);								break;
-			case Shader::OPCODE_DIV:		div(d, s0, s1);									break;
-			case Shader::OPCODE_MOD:		mod(d, s0, s1);									break;
-			case Shader::OPCODE_RSQX:		rsqx(d, s0, pp);								break;
-			case Shader::OPCODE_SQRT:		sqrt(d, s0, pp);								break;
-			case Shader::OPCODE_RSQ:		rsq(d, s0, pp);									break;
-			case Shader::OPCODE_LEN2:		len2(d.x, s0, pp);								break;
-			case Shader::OPCODE_LEN3:		len3(d.x, s0, pp);								break;
-			case Shader::OPCODE_LEN4:		len4(d.x, s0, pp);								break;
-			case Shader::OPCODE_DIST1:		dist1(d.x, s0, s1, pp);							break;
-			case Shader::OPCODE_DIST2:		dist2(d.x, s0, s1, pp);							break;
-			case Shader::OPCODE_DIST3:		dist3(d.x, s0, s1, pp);							break;
-			case Shader::OPCODE_DIST4:		dist4(d.x, s0, s1, pp);							break;
-			case Shader::OPCODE_MIN:		min(d, s0, s1);									break;
-			case Shader::OPCODE_MAX:		max(d, s0, s1);									break;
-			case Shader::OPCODE_LRP:		lrp(d, s0, s1, s2);								break;
-			case Shader::OPCODE_STEP:		step(d, s0, s1);								break;
-			case Shader::OPCODE_SMOOTH:		smooth(d, s0, s1, s2);							break;
-			case Shader::OPCODE_POWX:		powx(d, s0, s1, pp);							break;
-			case Shader::OPCODE_POW:		pow(d, s0, s1, pp);								break;
-			case Shader::OPCODE_SGN:		sgn(d, s0);										break;
-			case Shader::OPCODE_CRS:		crs(d, s0, s1);									break;
-			case Shader::OPCODE_FORWARD1:	forward1(d, s0, s1, s2);						break;
-			case Shader::OPCODE_FORWARD2:	forward2(d, s0, s1, s2);						break;
-			case Shader::OPCODE_FORWARD3:	forward3(d, s0, s1, s2);						break;
-			case Shader::OPCODE_FORWARD4:	forward4(d, s0, s1, s2);						break;
-			case Shader::OPCODE_REFLECT1:	reflect1(d, s0, s1);							break;
-			case Shader::OPCODE_REFLECT2:	reflect2(d, s0, s1);							break;
-			case Shader::OPCODE_REFLECT3:	reflect3(d, s0, s1);							break;
-			case Shader::OPCODE_REFLECT4:	reflect4(d, s0, s1);							break;
-			case Shader::OPCODE_REFRACT1:	refract1(d, s0, s1, s2.x);						break;
-			case Shader::OPCODE_REFRACT2:	refract2(d, s0, s1, s2.x);						break;
-			case Shader::OPCODE_REFRACT3:	refract3(d, s0, s1, s2.x);						break;
-			case Shader::OPCODE_REFRACT4:	refract4(d, s0, s1, s2.x);						break;
-			case Shader::OPCODE_NRM2:		nrm2(d, s0, pp);								break;
-			case Shader::OPCODE_NRM3:		nrm3(d, s0, pp);								break;
-			case Shader::OPCODE_NRM4:		nrm4(d, s0, pp);								break;
-			case Shader::OPCODE_ABS:		abs(d, s0);										break;
-			case Shader::OPCODE_SINCOS:		sincos(d, s0, pp);								break;
-			case Shader::OPCODE_COS:		cos(d, s0, pp);									break;
-			case Shader::OPCODE_SIN:		sin(d, s0, pp);									break;
-			case Shader::OPCODE_TAN:		tan(d, s0, pp);									break;
-			case Shader::OPCODE_ACOS:		acos(d, s0, pp);								break;
-			case Shader::OPCODE_ASIN:		asin(d, s0, pp);								break;
-			case Shader::OPCODE_ATAN:		atan(d, s0, pp);								break;
-			case Shader::OPCODE_ATAN2:		atan2(d, s0, s1, pp);							break;
-			case Shader::OPCODE_COSH:		cosh(d, s0, pp);								break;
-			case Shader::OPCODE_SINH:		sinh(d, s0, pp);								break;
-			case Shader::OPCODE_TANH:		tanh(d, s0, pp);								break;
-			case Shader::OPCODE_ACOSH:		acosh(d, s0, pp);								break;
-			case Shader::OPCODE_ASINH:		asinh(d, s0, pp);								break;
-			case Shader::OPCODE_ATANH:		atanh(d, s0, pp);								break;
-			case Shader::OPCODE_M4X4:		M4X4(r, d, s0, src1);							break;
-			case Shader::OPCODE_M4X3:		M4X3(r, d, s0, src1);							break;
-			case Shader::OPCODE_M3X4:		M3X4(r, d, s0, src1);							break;
-			case Shader::OPCODE_M3X3:		M3X3(r, d, s0, src1);							break;
-			case Shader::OPCODE_M3X2:		M3X2(r, d, s0, src1);							break;
-			case Shader::OPCODE_TEX:		TEXLD(r, d, s0, src1, project, bias);			break;
-			case Shader::OPCODE_TEXLDD:		TEXLDD(r, d, s0, src1, s2, s3, project, bias);	break;
-			case Shader::OPCODE_TEXLDL:		TEXLDL(r, d, s0, src1, project, bias);			break;
-			case Shader::OPCODE_TEXKILL:	TEXKILL(cMask, d, dst.mask);					break;
-			case Shader::OPCODE_DISCARD:	DISCARD(r, cMask, instruction);					break;
-			case Shader::OPCODE_DFDX:		DFDX(d, s0);									break;
-			case Shader::OPCODE_DFDY:		DFDY(d, s0);									break;
-			case Shader::OPCODE_FWIDTH:		FWIDTH(d, s0);									break;
-			case Shader::OPCODE_BREAK:		BREAK(r);										break;
-			case Shader::OPCODE_BREAKC:		BREAKC(r, s0, s1, control);						break;
-			case Shader::OPCODE_BREAKP:		BREAKP(r, src0);								break;
-			case Shader::OPCODE_CONTINUE:	CONTINUE(r);									break;
-			case Shader::OPCODE_TEST:		TEST();											break;
-			case Shader::OPCODE_CALL:		CALL(r, dst.label, dst.callSite);               break;
-			case Shader::OPCODE_CALLNZ:		CALLNZ(r, dst.label, dst.callSite, src0);       break;
-			case Shader::OPCODE_ELSE:		ELSE(r);										break;
-			case Shader::OPCODE_ENDIF:		ENDIF(r);										break;
-			case Shader::OPCODE_ENDLOOP:	ENDLOOP(r);										break;
-			case Shader::OPCODE_ENDREP:		ENDREP(r);										break;
-			case Shader::OPCODE_ENDWHILE:	ENDWHILE(r);	     							break;
-			case Shader::OPCODE_IF:			IF(r, src0);									break;
-			case Shader::OPCODE_IFC:		IFC(r, s0, s1, control);						break;
-			case Shader::OPCODE_LABEL:		LABEL(dst.index);								break;
-			case Shader::OPCODE_LOOP:		LOOP(r, src1);									break;
-			case Shader::OPCODE_REP:		REP(r, src0);									break;
-			case Shader::OPCODE_WHILE:		WHILE(r, src0);									break;
-			case Shader::OPCODE_RET:		RET(r);											break;
-			case Shader::OPCODE_LEAVE:		LEAVE(r);										break;
-			case Shader::OPCODE_CMP:		cmp(d, s0, s1, control);						break;
-			case Shader::OPCODE_ALL:		all(d.x, s0);									break;
-			case Shader::OPCODE_ANY:		any(d.x, s0);									break;
-			case Shader::OPCODE_NOT:		not(d, s0);										break;
-			case Shader::OPCODE_OR:			or(d.x, s0.x, s1.x);							break;
-			case Shader::OPCODE_XOR:		xor(d.x, s0.x, s1.x);							break;
-			case Shader::OPCODE_AND:		and(d.x, s0.x, s1.x);							break;
-			case Shader::OPCODE_END:														break;
-			default:
-				ASSERT(false);
-			}
-
-			if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
-			{
-				if(dst.integer)
-				{
-					switch(opcode)
-					{
-					case Shader::OPCODE_DIV:
-						if(dst.x) d.x = Trunc(d.x);
-						if(dst.y) d.y = Trunc(d.y);
-						if(dst.z) d.z = Trunc(d.z);
-						if(dst.w) d.w = Trunc(d.w);
-						break;
-					default:
-						break;   // No truncation to integer required when arguments are integer
-					}
-				}
-
-				if(dst.saturate)
-				{
-					if(dst.x) d.x = Max(d.x, Float4(0.0f));
-					if(dst.y) d.y = Max(d.y, Float4(0.0f));
-					if(dst.z) d.z = Max(d.z, Float4(0.0f));
-					if(dst.w) d.w = Max(d.w, Float4(0.0f));
-
-					if(dst.x) d.x = Min(d.x, Float4(1.0f));
-					if(dst.y) d.y = Min(d.y, Float4(1.0f));
-					if(dst.z) d.z = Min(d.z, Float4(1.0f));
-					if(dst.w) d.w = Min(d.w, Float4(1.0f));
-				}
-
-				if(instruction->isPredicated())
-				{
-					Vector4f pDst;   // FIXME: Rename
-
-					switch(dst.type)
-					{
-					case Shader::PARAMETER_TEMP:
-						if(dst.rel.type == Shader::PARAMETER_VOID)
-						{
-							if(dst.x) pDst.x = r.rf[dst.index].x;
-							if(dst.y) pDst.y = r.rf[dst.index].y;
-							if(dst.z) pDst.z = r.rf[dst.index].z;
-							if(dst.w) pDst.w = r.rf[dst.index].w;
-						}
-						else
-						{
-							Int a = relativeAddress(r, dst);
-
-							if(dst.x) pDst.x = r.rf[dst.index + a].x;
-							if(dst.y) pDst.y = r.rf[dst.index + a].y;
-							if(dst.z) pDst.z = r.rf[dst.index + a].z;
-							if(dst.w) pDst.w = r.rf[dst.index + a].w;
-						}
-						break;
-					case Shader::PARAMETER_COLOROUT:
-						ASSERT(dst.rel.type == Shader::PARAMETER_VOID);
-						if(dst.x) pDst.x = r.oC[dst.index].x;
-						if(dst.y) pDst.y = r.oC[dst.index].y;
-						if(dst.z) pDst.z = r.oC[dst.index].z;
-						if(dst.w) pDst.w = r.oC[dst.index].w;
-						break;
-					case Shader::PARAMETER_PREDICATE:
-						if(dst.x) pDst.x = r.p0.x;
-						if(dst.y) pDst.y = r.p0.y;
-						if(dst.z) pDst.z = r.p0.z;
-						if(dst.w) pDst.w = r.p0.w;
-						break;
-					case Shader::PARAMETER_DEPTHOUT:
-						pDst.x = r.oDepth;
-						break;
-					default:
-						ASSERT(false);
-					}
-				
-					Int4 enable = enableMask(r, instruction);
-
-					Int4 xEnable = enable;
-					Int4 yEnable = enable;
-					Int4 zEnable = enable;
-					Int4 wEnable = enable;
-
-					if(predicate)
-					{
-						unsigned char pSwizzle = instruction->predicateSwizzle;
-
-						Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
-						Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
-						Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
-						Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
-
-						if(!instruction->predicateNot)
-						{
-							if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
-							if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
-							if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
-							if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
-						}
-						else
-						{
-							if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
-							if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
-							if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
-							if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
-						}
-					}
-
-					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
-					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
-					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
-					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
-
-					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
-					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
-					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
-					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
-				}
-
-				switch(dst.type)
-				{
-				case Shader::PARAMETER_TEMP:
-					if(dst.rel.type == Shader::PARAMETER_VOID)
-					{
-						if(dst.x) r.rf[dst.index].x = d.x;
-						if(dst.y) r.rf[dst.index].y = d.y;
-						if(dst.z) r.rf[dst.index].z = d.z;
-						if(dst.w) r.rf[dst.index].w = d.w;
-					}
-					else
-					{
-						Int a = relativeAddress(r, dst);
-
-						if(dst.x) r.rf[dst.index + a].x = d.x;
-						if(dst.y) r.rf[dst.index + a].y = d.y;
-						if(dst.z) r.rf[dst.index + a].z = d.z;
-						if(dst.w) r.rf[dst.index + a].w = d.w;
-					}
-					break;
-				case Shader::PARAMETER_COLOROUT:
-					ASSERT(dst.rel.type == Shader::PARAMETER_VOID);
-					if(dst.x) {r.oC[dst.index].x = d.x; out[dst.index][0] = true;}
-					if(dst.y) {r.oC[dst.index].y = d.y; out[dst.index][1] = true;}
-					if(dst.z) {r.oC[dst.index].z = d.z; out[dst.index][2] = true;}
-					if(dst.w) {r.oC[dst.index].w = d.w; out[dst.index][3] = true;}
-					break;
-				case Shader::PARAMETER_PREDICATE:
-					if(dst.x) r.p0.x = d.x;
-					if(dst.y) r.p0.y = d.y;
-					if(dst.z) r.p0.z = d.z;
-					if(dst.w) r.p0.w = d.w;
-					break;
-				case Shader::PARAMETER_DEPTHOUT:
-					r.oDepth = d.x;
-					break;
-				default:
-					ASSERT(false);
-				}
-			}
-		}
-
-		if(currentLabel != -1)
-		{
-			Nucleus::setInsertBlock(returnBlock);
-		}
-
-		for(int i = 0; i < 4; i++)
-		{
-			if(state.targetFormat[i] != FORMAT_NULL)
-			{
-				if(!out[i][0]) r.oC[i].x = Float4(0.0f);
-				if(!out[i][1]) r.oC[i].y = Float4(0.0f);
-				if(!out[i][2]) r.oC[i].z = Float4(0.0f);
-				if(!out[i][3]) r.oC[i].w = Float4(0.0f);
-			}
-		}
-	}
-
-	Short4 PixelRoutine::convertFixed12(RValue<Float4> cf)
-	{
-		return RoundShort4(cf * Float4(0x1000));
-	}
-
-	void PixelRoutine::convertFixed12(Vector4s &cs, Vector4f &cf)
-	{
-		cs.x = convertFixed12(cf.x);
-		cs.y = convertFixed12(cf.y);
-		cs.z = convertFixed12(cf.z);
-		cs.w = convertFixed12(cf.w);
-	}
-
 	UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
 	{
 		return UShort4(cf * Float4(0xFFFF), saturate);
 	}
 
-	void PixelRoutine::convertFixed16(Vector4s &cs, Vector4f &cf, bool saturate)
-	{
-		cs.x = convertFixed16(cf.x, saturate);
-		cs.y = convertFixed16(cf.y, saturate);
-		cs.z = convertFixed16(cf.z, saturate);
-		cs.w = convertFixed16(cf.w, saturate);
-	}
-
-	Float4 PixelRoutine::convertSigned12(Short4 &cs)
-	{
-		return Float4(cs) * Float4(1.0f / 0x0FFE);
-	}
-
-	void PixelRoutine::convertSigned12(Vector4f &cf, Vector4s &cs)
-	{
-		cf.x = convertSigned12(cs.x);
-		cf.y = convertSigned12(cs.y);
-		cf.z = convertSigned12(cs.z);
-		cf.w = convertSigned12(cs.w);
-	}
-
-	Float4 PixelRoutine::convertUnsigned16(UShort4 cs)
-	{
-		return Float4(cs) * Float4(1.0f / 0xFFFF);
-	}
-
 	void PixelRoutine::sRGBtoLinear16_12_16(Registers &r, Vector4s &c)
 	{
 		c.x = As<UShort4>(c.x) >> 4;
@@ -4422,14 +2385,6 @@
 		c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
 	}
 
-	Float4 PixelRoutine::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
-	{
-		Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
-		Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
-
-		return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
-	}
-
 	Float4 PixelRoutine::sRGBtoLinear(const Float4 &x)   // Approximates x^2.2
 	{
 		Float4 linear = x * x;
@@ -4438,1592 +2393,8 @@
 		return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
 	}
 
-	void PixelRoutine::MOV(Vector4s &dst, Vector4s &src0)
-	{
-		dst.x = src0.x;
-		dst.y = src0.y;
-		dst.z = src0.z;
-		dst.w = src0.w;
-	}
-
-	void PixelRoutine::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
-	{
-		dst.x = AddSat(src0.x, src1.x);
-		dst.y = AddSat(src0.y, src1.y);
-		dst.z = AddSat(src0.z, src1.z);
-		dst.w = AddSat(src0.w, src1.w);
-	}
-
-	void PixelRoutine::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
-	{
-		dst.x = SubSat(src0.x, src1.x);
-		dst.y = SubSat(src0.y, src1.y);
-		dst.z = SubSat(src0.z, src1.z);
-		dst.w = SubSat(src0.w, src1.w);
-	}
-
-	void PixelRoutine::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
-	{
-		// FIXME: Long fixed-point multiply fixup
-		{dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x);}
-		{dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);}
-		{dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z);}
-		{dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w);}
-	}
-
-	void PixelRoutine::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
-	{
-		// FIXME: Long fixed-point multiply fixup
-		{dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x);}
-		{dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y);}
-		{dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z);}
-		{dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w);}
-	}
-
-	void PixelRoutine::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
-	{
-		Short4 t0;
-		Short4 t1;
-
-		// FIXME: Long fixed-point multiply fixup
-		t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); 
-		t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 
-		t0 = AddSat(t0, t1);
-		t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 
-		t0 = AddSat(t0, t1);
-
-		dst.x = t0;
-		dst.y = t0;
-		dst.z = t0;
-		dst.w = t0;
-	}
-
-	void PixelRoutine::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
-	{
-		Short4 t0;
-		Short4 t1;
-
-		// FIXME: Long fixed-point multiply fixup
-		t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); 
-		t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 
-		t0 = AddSat(t0, t1);
-		t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 
-		t0 = AddSat(t0, t1);
-		t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 
-		t0 = AddSat(t0, t1);
-
-		dst.x = t0;
-		dst.y = t0;
-		dst.z = t0;
-		dst.w = t0;
-	}
-
-	void PixelRoutine::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
-	{
-		// FIXME: Long fixed-point multiply fixup
-		{dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x);}
-		{dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);}
-		{dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z);}
-		{dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w);}
-	}
-
-	void PixelRoutine::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
-	{
-		Float4 uw;
-		Float4 vw;
-		Float4 sw;
-
-		if(state.interpolant[2 + coordinate].component & 0x01)
-		{
-			uw = Max(u, Float4(0.0f));
-			uw = Min(uw, Float4(1.0f));
-			dst.x = convertFixed12(uw);
-		}
-		else
-		{
-			dst.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-		}
-
-		if(state.interpolant[2 + coordinate].component & 0x02)
-		{
-			vw = Max(v, Float4(0.0f));
-			vw = Min(vw, Float4(1.0f));
-			dst.y = convertFixed12(vw);
-		}
-		else
-		{
-			dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-		}
-
-		if(state.interpolant[2 + coordinate].component & 0x04)
-		{
-			sw = Max(s, Float4(0.0f));
-			sw = Min(sw, Float4(1.0f));
-			dst.z = convertFixed12(sw);
-		}
-		else
-		{
-			dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-		}
-
-		dst.w = Short4(0x1000);
-	}
-
-	void PixelRoutine::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
-	{
-		Float4 uw = u;
-		Float4 vw = v;
-		Float4 sw = s;
-
-		if(project)
-		{
-			uw *= Rcp_pp(s);
-			vw *= Rcp_pp(s);
-		}
-
-		if(state.interpolant[2 + coordinate].component & 0x01)
-		{
-			uw *= Float4(0x1000);
-			uw = Max(uw, Float4(-0x8000));
-			uw = Min(uw, Float4(0x7FFF));
-			dst.x = RoundShort4(uw);
-		}
-		else
-		{
-			dst.x = Short4(0x0000);
-		}
-
-		if(state.interpolant[2 + coordinate].component & 0x02)
-		{
-			vw *= Float4(0x1000);
-			vw = Max(vw, Float4(-0x8000));
-			vw = Min(vw, Float4(0x7FFF));
-			dst.y = RoundShort4(vw);
-		}
-		else
-		{
-			dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-		}
-		
-		if(state.interpolant[2 + coordinate].component & 0x04)
-		{
-			sw *= Float4(0x1000);
-			sw = Max(sw, Float4(-0x8000));
-			sw = Min(sw, Float4(0x7FFF));
-			dst.z = RoundShort4(sw);
-		}
-		else
-		{
-			dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
-		}
-	}
-
-	void PixelRoutine::TEXDP3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
-	{
-		TEXM3X3PAD(r, u, v, s, src, 0, false);
-
-		Short4 t0 = RoundShort4(r.u_ * Float4(0x1000));
-
-		dst.x = t0;
-		dst.y = t0;
-		dst.z = t0;
-		dst.w = t0;
-	}
-
-	void PixelRoutine::TEXDP3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
-	{
-		TEXM3X3PAD(r, u, v, s, src0, 0, false);
-
-		r.v_ = Float4(0.0f);
-		r.w_ = Float4(0.0f);
-
-		sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
-	}
-
-	void PixelRoutine::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
-	{
-		Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
-		           SignMask(CmpNLT(v, Float4(0.0f))) &
-		           SignMask(CmpNLT(s, Float4(0.0f)));
-
-		for(unsigned int q = 0; q < state.multiSample; q++)
-		{
-			cMask[q] &= kill;
-		}
-	}
-
-	void PixelRoutine::TEXKILL(Int cMask[4], Vector4s &src)
-	{
-		Short4 test = src.x | src.y | src.z;
-		Int kill = SignMask(Pack(test, test)) ^ 0x0000000F;
-
-		for(unsigned int q = 0; q < state.multiSample; q++)
-		{
-			cMask[q] &= kill;
-		}
-	}
-
-	void PixelRoutine::TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
-	{
-		sampleTexture(r, dst, sampler, u, v, s, s, project);
-	}
-
-	void PixelRoutine::TEXLD(Registers &r, Vector4s &dst, Vector4s &src, int sampler, bool project)
-	{
-		Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
-		Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
-		Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
-
-		sampleTexture(r, dst, sampler, u, v, s, s, project);
-	}
-
-	void PixelRoutine::TEXBEM(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
-	{
-		Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
-		Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
-
-		Float4 du2 = du;
-		Float4 dv2 = dv;
-
-		du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
-		dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
-		du += dv2;
-		dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
-		du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
-		dv += du2;
-
-		Float4 u_ = u + du;
-		Float4 v_ = v + dv;
-
-		sampleTexture(r, dst, stage, u_, v_, s, s);
-	}
-
-	void PixelRoutine::TEXBEML(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
-	{
-		Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
-		Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
-
-		Float4 du2 = du;
-		Float4 dv2 = dv;
-
-		du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
-		dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
-		du += dv2;
-		dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
-		du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
-		dv += du2;
-
-		Float4 u_ = u + du;
-		Float4 v_ = v + dv;
-
-		sampleTexture(r, dst, stage, u_, v_, s, s);
-
-		Short4 L;
-
-		L = src.z;
-		L = MulHigh(L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceScale4)));
-		L = L << 4;
-		L = AddSat(L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceOffset4)));
-		L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
-		L = Min(L, Short4(0x1000));
-
-		dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
-		dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
-		dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
-	}
-
-	void PixelRoutine::TEXREG2AR(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
-	{
-		Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
-		Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
-		Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
-
-		sampleTexture(r, dst, stage, u, v, s, s);
-	}
-
-	void PixelRoutine::TEXREG2GB(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
-	{
-		Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
-		Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
-		Float4 s = v;
-
-		sampleTexture(r, dst, stage, u, v, s, s);
-	}
-
-	void PixelRoutine::TEXREG2RGB(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
-	{
-		Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
-		Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
-		Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
-
-		sampleTexture(r, dst, stage, u, v, s, s);
-	}
-
-	void PixelRoutine::TEXM3X2DEPTH(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
-	{
-		TEXM3X2PAD(r, u, v, s, src, 1, signedScaling);
-
-		// z / w
-		r.u_ *= Rcp_pp(r.v_);   // FIXME: Set result to 1.0 when division by zero
-
-		r.oDepth = r.u_;
-	}
-
-	void PixelRoutine::TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
-	{
-		TEXM3X3PAD(r, u, v, s, src0, component, signedScaling);
-	}
-
-	void PixelRoutine::TEXM3X2TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
-	{
-		TEXM3X2PAD(r, u, v, s, src0, 1, signedScaling);
-
-		r.w_ = Float4(0.0f);
-
-		sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
-	}
-
-	void PixelRoutine::TEXM3X3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
-	{
-		TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling);
-
-		dst.x = RoundShort4(r.u_ * Float4(0x1000));
-		dst.y = RoundShort4(r.v_ * Float4(0x1000));
-		dst.z = RoundShort4(r.w_ * Float4(0x1000));
-		dst.w = Short4(0x1000);
-	}
-
-	void PixelRoutine::TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
-	{
-		if(component == 0 || previousScaling != signedScaling)   // FIXME: Other source modifiers?
-		{
-			r.U = Float4(src0.x);
-			r.V = Float4(src0.y);
-			r.W = Float4(src0.z);
-
-			previousScaling = signedScaling;
-		}
-
-		Float4 x = r.U * u + r.V * v + r.W * s;
-
-		x *= Float4(1.0f / 0x1000);
-
-		switch(component)
-		{
-		case 0:	r.u_ = x; break;
-		case 1:	r.v_ = x; break;
-		case 2: r.w_ = x; break;
-		default: ASSERT(false);
-		}
-	}
-
-	void PixelRoutine::TEXM3X3SPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
-	{
-		TEXM3X3PAD(r, u, v, s, src0, 2, false);
-
-		Float4 E[3];   // Eye vector
-
-		E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
-		E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
-		E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
-
-		// Reflection
-		Float4 u__;
-		Float4 v__;
-		Float4 w__;
-
-		// (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
-		u__ = r.u_ * E[0];
-		v__ = r.v_ * E[1];
-		w__ = r.w_ * E[2];
-		u__ += v__ + w__;
-		u__ += u__;
-		v__ = u__;
-		w__ = u__;
-		u__ *= r.u_;
-		v__ *= r.v_;
-		w__ *= r.w_;
-		r.u_ *= r.u_;
-		r.v_ *= r.v_;
-		r.w_ *= r.w_;
-		r.u_ += r.v_ + r.w_;
-		u__ -= E[0] * r.u_;
-		v__ -= E[1] * r.u_;
-		w__ -= E[2] * r.u_;
-
-		sampleTexture(r, dst, stage,  u__, v__, w__, w__);
-	}
-
-	void PixelRoutine::TEXM3X3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
-	{
-		TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling);
-
-		sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
-	}
-
-	void PixelRoutine::TEXM3X3VSPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
-	{
-		TEXM3X3PAD(r, u, v, s, src0, 2, false);
-
-		Float4 E[3];   // Eye vector
-
-		E[0] = r.vf[2 + stage - 2].w;
-		E[1] = r.vf[2 + stage - 1].w;
-		E[2] = r.vf[2 + stage - 0].w;
-
-		// Reflection
-		Float4 u__;
-		Float4 v__;
-		Float4 w__;
-
-		// (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
-		u__ = r.u_ * E[0];
-		v__ = r.v_ * E[1];
-		w__ = r.w_ * E[2];
-		u__ += v__ + w__;
-		u__ += u__;
-		v__ = u__;
-		w__ = u__;
-		u__ *= r.u_;
-		v__ *= r.v_;
-		w__ *= r.w_;
-		r.u_ *= r.u_;
-		r.v_ *= r.v_;
-		r.w_ *= r.w_;
-		r.u_ += r.v_ + r.w_;
-		u__ -= E[0] * r.u_;
-		v__ -= E[1] * r.u_;
-		w__ -= E[2] * r.u_;
-
-		sampleTexture(r, dst, stage, u__, v__, w__, w__);
-	}
-
-	void PixelRoutine::TEXDEPTH(Registers &r)
-	{
-		r.u_ = Float4(r.rs[5].x);
-		r.v_ = Float4(r.rs[5].y);
-
-		// z / w
-		r.u_ *= Rcp_pp(r.v_);   // FIXME: Set result to 1.0 when division by zero
-
-		r.oDepth = r.u_;
-	}
-
-	void PixelRoutine::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
-	{
-		{Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0;};
-		{Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0;};
-		{Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0;};
-		{Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0;};
-	}
-
-	void PixelRoutine::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
-	{
-		{Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0;};
-		{Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0;};
-		{Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0;};
-		{Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0;};
-	}
-
-	void PixelRoutine::BEM(Registers &r, Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
-	{
-		Short4 t0;
-		Short4 t1;
-
-		// dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
-		t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4;   // FIXME: Matrix components range? Overflow hazard.
-		t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4;   // FIXME: Matrix components range? Overflow hazard.
-		t0 = AddSat(t0, t1);
-		t0 = AddSat(t0, src0.x);
-		dst.x = t0;
-
-		// dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
-		t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4;   // FIXME: Matrix components range? Overflow hazard.
-		t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4;   // FIXME: Matrix components range? Overflow hazard.
-		t0 = AddSat(t0, t1);
-		t0 = AddSat(t0, src0.y);
-		dst.y = t0;
-	}
-
-	void PixelRoutine::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
-	{
-		Vector4f row0 = fetchRegisterF(r, src1, 0);
-		Vector4f row1 = fetchRegisterF(r, src1, 1);
-
-		dst.x = dot3(src0, row0);
-		dst.y = dot3(src0, row1);
-	}
-
-	void PixelRoutine::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
-	{
-		Vector4f row0 = fetchRegisterF(r, src1, 0);
-		Vector4f row1 = fetchRegisterF(r, src1, 1);
-		Vector4f row2 = fetchRegisterF(r, src1, 2);
-
-		dst.x = dot3(src0, row0);
-		dst.y = dot3(src0, row1);
-		dst.z = dot3(src0, row2);
-	}
-
-	void PixelRoutine::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
-	{
-		Vector4f row0 = fetchRegisterF(r, src1, 0);
-		Vector4f row1 = fetchRegisterF(r, src1, 1);
-		Vector4f row2 = fetchRegisterF(r, src1, 2);
-		Vector4f row3 = fetchRegisterF(r, src1, 3);
-
-		dst.x = dot3(src0, row0);
-		dst.y = dot3(src0, row1);
-		dst.z = dot3(src0, row2);
-		dst.w = dot3(src0, row3);
-	}
-
-	void PixelRoutine::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
-	{
-		Vector4f row0 = fetchRegisterF(r, src1, 0);
-		Vector4f row1 = fetchRegisterF(r, src1, 1);
-		Vector4f row2 = fetchRegisterF(r, src1, 2);
-
-		dst.x = dot4(src0, row0);
-		dst.y = dot4(src0, row1);
-		dst.z = dot4(src0, row2);
-	}
-
-	void PixelRoutine::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
-	{
-		Vector4f row0 = fetchRegisterF(r, src1, 0);
-		Vector4f row1 = fetchRegisterF(r, src1, 1);
-		Vector4f row2 = fetchRegisterF(r, src1, 2);
-		Vector4f row3 = fetchRegisterF(r, src1, 3);
-
-		dst.x = dot4(src0, row0);
-		dst.y = dot4(src0, row1);
-		dst.z = dot4(src0, row2);
-		dst.w = dot4(src0, row3);
-	}
-
-	void PixelRoutine::TEXLD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
-	{
-		Vector4f tmp;
-		sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias);	
-
-		dst.x = tmp[(src1.swizzle >> 0) & 0x3];
-		dst.y = tmp[(src1.swizzle >> 2) & 0x3];
-		dst.z = tmp[(src1.swizzle >> 4) & 0x3];
-		dst.w = tmp[(src1.swizzle >> 6) & 0x3];
-	}
-	
-	void PixelRoutine::TEXLDD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2,  Vector4f &src3, bool project, bool bias)
-	{
-		Vector4f tmp;
-		sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, project, bias, true);
-
-		dst.x = tmp[(src1.swizzle >> 0) & 0x3];
-		dst.y = tmp[(src1.swizzle >> 2) & 0x3];
-		dst.z = tmp[(src1.swizzle >> 4) & 0x3];
-		dst.w = tmp[(src1.swizzle >> 6) & 0x3];
-	}
-	
-	void PixelRoutine::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
-	{
-		Vector4f tmp;
-		sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias, false, true);
-
-		dst.x = tmp[(src1.swizzle >> 0) & 0x3];
-		dst.y = tmp[(src1.swizzle >> 2) & 0x3];
-		dst.z = tmp[(src1.swizzle >> 4) & 0x3];
-		dst.w = tmp[(src1.swizzle >> 6) & 0x3];
-	}
-
-	void PixelRoutine::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
-	{
-		Int kill = -1;
-		
-		if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
-		if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
-		if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
-		if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
-
-		// FIXME: Dynamic branching affects TEXKILL?
-	//	if(shader->containsDynamicBranching())
-	//	{
-	//		kill = ~SignMask(enableMask(r));
-	//	}
-
-		for(unsigned int q = 0; q < state.multiSample; q++)
-		{
-			cMask[q] &= kill;
-		}
-
-		// FIXME: Branch to end of shader if all killed?
-	}
-
-	void PixelRoutine::DISCARD(Registers &r, Int cMask[4], const Shader::Instruction *instruction)
-	{
-		Int kill = 0;
-		
-		if(shader->containsDynamicBranching())
-		{
-			kill = ~SignMask(enableMask(r, instruction));
-		}
-		
-		for(unsigned int q = 0; q < state.multiSample; q++)
-		{
-			cMask[q] &= kill;
-		}
-
-		// FIXME: Branch to end of shader if all killed?
-	}
-
-	void PixelRoutine::DFDX(Vector4f &dst, Vector4f &src)
-	{
-		dst.x = src.x.yyww - src.x.xxzz;
-		dst.y = src.y.yyww - src.y.xxzz;
-		dst.z = src.z.yyww - src.z.xxzz;
-		dst.w = src.w.yyww - src.w.xxzz;
-	}
-
-	void PixelRoutine::DFDY(Vector4f &dst, Vector4f &src)
-	{
-		dst.x = src.x.zwzw - src.x.xyxy;
-		dst.y = src.y.zwzw - src.y.xyxy;
-		dst.z = src.z.zwzw - src.z.xyxy;
-		dst.w = src.w.zwzw - src.w.xyxy;
-	}
-
-	void PixelRoutine::FWIDTH(Vector4f &dst, Vector4f &src)
-	{
-		// abs(dFdx(src)) + abs(dFdy(src));
-		dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
-		dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
-		dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
-		dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
-	}
-
-	void PixelRoutine::BREAK(Registers &r)
-	{
-		llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
-
-		if(breakDepth == 0)
-		{
-			r.enableIndex = r.enableIndex - breakDepth;
-			Nucleus::createBr(endBlock);
-		}
-		else
-		{
-			r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
-			Bool allBreak = SignMask(r.enableBreak) == 0x0;
-
-			r.enableIndex = r.enableIndex - breakDepth;
-			branch(allBreak, endBlock, deadBlock);
-		}
-
-		Nucleus::setInsertBlock(deadBlock);
-		r.enableIndex = r.enableIndex + breakDepth;
-	}
-
-	void PixelRoutine::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
-	{
-		Int4 condition;
-
-		switch(control)
-		{
-		case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);	break;
-		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);		break;
-		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);	break;
-		case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);	break;
-		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);	break;
-		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);		break;
-		default:
-			ASSERT(false);
-		}
-
-		BREAK(r, condition);
-	}
-
-	void PixelRoutine::BREAKP(Registers &r, const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
-	{
-		Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
-
-		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
-		{
-			condition = ~condition;
-		}
-
-		BREAK(r, condition);
-	}
-
-	void PixelRoutine::BREAK(Registers &r, Int4 &condition)
-	{
-		condition &= r.enableStack[r.enableIndex];
-
-		llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
-
-		r.enableBreak = r.enableBreak & ~condition;
-		Bool allBreak = SignMask(r.enableBreak) == 0x0;
-
-		r.enableIndex = r.enableIndex - breakDepth;
-		branch(allBreak, endBlock, continueBlock);
-
-		Nucleus::setInsertBlock(continueBlock);
-		r.enableIndex = r.enableIndex + breakDepth;
-	}
-
-	void PixelRoutine::CONTINUE(Registers &r)
-	{
-		r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
-	}
-
-	void PixelRoutine::TEST()
-	{
-		whileTest = true;
-	}
-
-	void PixelRoutine::CALL(Registers &r, int labelIndex, int callSiteIndex)
-	{
-		if(!labelBlock[labelIndex])
-		{
-			labelBlock[labelIndex] = Nucleus::createBasicBlock();
-		}
-
-		if(callRetBlock[labelIndex].size() > 1)
-		{
-			r.callStack[r.stackIndex++] = UInt(callSiteIndex);
-		}
-
-		Int4 restoreLeave = r.enableLeave;
-
-		Nucleus::createBr(labelBlock[labelIndex]);
-		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
-
-		r.enableLeave = restoreLeave;
-	}
-
-	void PixelRoutine::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src)
-	{
-		if(src.type == Shader::PARAMETER_CONSTBOOL)
-		{
-			CALLNZb(r, labelIndex, callSiteIndex, src);
-		}
-		else if(src.type == Shader::PARAMETER_PREDICATE)
-		{
-			CALLNZp(r, labelIndex, callSiteIndex, src);
-		}
-		else ASSERT(false);
-	}
-
-	void PixelRoutine::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister)
-	{
-		Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,ps.b[boolRegister.index])) != Byte(0));   // FIXME
-		
-		if(boolRegister.modifier == Shader::MODIFIER_NOT)
-		{
-			condition = !condition;	
-		}
-
-		if(!labelBlock[labelIndex])
-		{
-			labelBlock[labelIndex] = Nucleus::createBasicBlock();
-		}
-
-		if(callRetBlock[labelIndex].size() > 1)
-		{
-			r.callStack[r.stackIndex++] = UInt(callSiteIndex);
-		}
-
-		Int4 restoreLeave = r.enableLeave;
-
-		branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
-		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
-
-		r.enableLeave = restoreLeave;
-	}
-
-	void PixelRoutine::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister)
-	{
-		Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
-
-		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
-		{
-			condition = ~condition;
-		}
-
-		condition &= r.enableStack[r.enableIndex];
-
-		if(!labelBlock[labelIndex])
-		{
-			labelBlock[labelIndex] = Nucleus::createBasicBlock();
-		}
-
-		if(callRetBlock[labelIndex].size() > 1)
-		{
-			r.callStack[r.stackIndex++] = UInt(callSiteIndex);
-		}
-
-		r.enableIndex++;
-		r.enableStack[r.enableIndex] = condition;
-		Int4 restoreLeave = r.enableLeave;
-
-		Bool notAllFalse = SignMask(condition) != 0;
-		branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
-		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
-
-		r.enableIndex--;
-		r.enableLeave = restoreLeave;
-	}
-
-	void PixelRoutine::ELSE(Registers &r)
-	{
-		ifDepth--;
-
-		llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
-		llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
-
-		if(isConditionalIf[ifDepth])
-		{
-			Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
-			Bool notAllFalse = SignMask(condition) != 0;
-
-			branch(notAllFalse, falseBlock, endBlock);
-
-			r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
-		}
-		else
-		{
-			Nucleus::createBr(endBlock);
-			Nucleus::setInsertBlock(falseBlock);
-		}
-
-		ifFalseBlock[ifDepth] = endBlock;
-
-		ifDepth++;
-	}
-
-	void PixelRoutine::ENDIF(Registers &r)
-	{
-		ifDepth--;
-
-		llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
-
-		Nucleus::createBr(endBlock);
-		Nucleus::setInsertBlock(endBlock);
-
-		if(isConditionalIf[ifDepth])
-		{
-			breakDepth--;
-			r.enableIndex--;
-		}
-	}
-
-	void PixelRoutine::ENDLOOP(Registers &r)
-	{
-		loopRepDepth--;
-
-		r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth];   // FIXME: +=
-
-		llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
-		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
-
-		Nucleus::createBr(testBlock);
-		Nucleus::setInsertBlock(endBlock);
-
-		r.loopDepth--;
-		r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-	}
-
-	void PixelRoutine::ENDREP(Registers &r)
-	{
-		loopRepDepth--;
-
-		llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
-		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
-
-		Nucleus::createBr(testBlock);
-		Nucleus::setInsertBlock(endBlock);
-
-		r.loopDepth--;
-		r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-	}
-
-	void PixelRoutine::ENDWHILE(Registers &r)
-	{
-		loopRepDepth--;
-
-		llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
-		llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
-
-		Nucleus::createBr(testBlock);
-		Nucleus::setInsertBlock(endBlock);
-
-		r.enableIndex--;
-		r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-		whileTest = false;
-	}
-
-	void PixelRoutine::IF(Registers &r, const Src &src)
-	{
-		if(src.type == Shader::PARAMETER_CONSTBOOL)
-		{
-			IFb(r, src);
-		}
-		else if(src.type == Shader::PARAMETER_PREDICATE)
-		{
-			IFp(r, src);
-		}
-		else
-		{
-			Int4 condition = As<Int4>(fetchRegisterF(r, src).x);
-			IF(r, condition);
-		}
-	}
-
-	void PixelRoutine::IFb(Registers &r, const Src &boolRegister)
-	{
-		ASSERT(ifDepth < 24 + 4);
-
-		Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,ps.b[boolRegister.index])) != Byte(0));   // FIXME
-
-		if(boolRegister.modifier == Shader::MODIFIER_NOT)
-		{
-			condition = !condition;
-		}
-
-		llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
-
-		branch(condition, trueBlock, falseBlock);
-
-		isConditionalIf[ifDepth] = false;
-		ifFalseBlock[ifDepth] = falseBlock;
-
-		ifDepth++;
-	}
-
-	void PixelRoutine::IFp(Registers &r, const Src &predicateRegister)
-	{
-		Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
-
-		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
-		{
-			condition = ~condition;
-		}
-
-		IF(r, condition);
-	}
-
-	void PixelRoutine::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
-	{
-		Int4 condition;
-
-		switch(control)
-		{
-		case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);	break;
-		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);		break;
-		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);	break;
-		case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);	break;
-		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);	break;
-		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);		break;
-		default:
-			ASSERT(false);
-		}
-
-		IF(r, condition);
-	}
-
-	void PixelRoutine::IF(Registers &r, Int4 &condition)
-	{
-		condition &= r.enableStack[r.enableIndex];
-
-		r.enableIndex++;
-		r.enableStack[r.enableIndex] = condition;
-
-		llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
-
-		Bool notAllFalse = SignMask(condition) != 0;
-
-		branch(notAllFalse, trueBlock, falseBlock);
-
-		isConditionalIf[ifDepth] = true;
-		ifFalseBlock[ifDepth] = falseBlock;
-
-		ifDepth++;
-		breakDepth++;
-	}
-
-	void PixelRoutine::LABEL(int labelIndex)
-	{
-		if(!labelBlock[labelIndex])
-		{
-			labelBlock[labelIndex] = Nucleus::createBasicBlock();
-		}
-
-		Nucleus::setInsertBlock(labelBlock[labelIndex]);
-		currentLabel = labelIndex;
-	}
-
-	void PixelRoutine::LOOP(Registers &r, const Src &integerRegister)
-	{
-		r.loopDepth++;
-
-		r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][0]));
-		r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][1]));
-		r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][2]));
-
-	//	If(r.increment[r.loopDepth] == 0)
-	//	{
-	//		r.increment[r.loopDepth] = 1;
-	//	}
-
-		llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
-
-		loopRepTestBlock[loopRepDepth] = testBlock;
-		loopRepEndBlock[loopRepDepth] = endBlock;
-
-		// FIXME: jump(testBlock)
-		Nucleus::createBr(testBlock);
-		Nucleus::setInsertBlock(testBlock);
-
-		branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
-		Nucleus::setInsertBlock(loopBlock);
-
-		r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
-		
-		loopRepDepth++;
-		breakDepth = 0;
-	}
-
-	void PixelRoutine::REP(Registers &r, const Src &integerRegister)
-	{
-		r.loopDepth++;
-
-		r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][0]));
-		r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
-
-		llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
-
-		loopRepTestBlock[loopRepDepth] = testBlock;
-		loopRepEndBlock[loopRepDepth] = endBlock;
-
-		// FIXME: jump(testBlock)
-		Nucleus::createBr(testBlock);
-		Nucleus::setInsertBlock(testBlock);
-
-		branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
-		Nucleus::setInsertBlock(loopBlock);
-
-		r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
-
-		loopRepDepth++;
-		breakDepth = 0;
-	}
-
-	void PixelRoutine::WHILE(Registers &r, const Src &temporaryRegister)
-	{
-		r.enableIndex++;
-
-		llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
-		llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
-		
-		loopRepTestBlock[loopRepDepth] = testBlock;
-		loopRepEndBlock[loopRepDepth] = endBlock;
-
-		Int4 restoreBreak = r.enableBreak;
-		Int4 restoreContinue = r.enableContinue;
-
-		// FIXME: jump(testBlock)
-		Nucleus::createBr(testBlock);
-		Nucleus::setInsertBlock(testBlock);
-		r.enableContinue = restoreContinue;
-
-		const Vector4f &src = fetchRegisterF(r, temporaryRegister);
-		Int4 condition = As<Int4>(src.x);
-		condition &= r.enableStack[r.enableIndex - 1];
-		r.enableStack[r.enableIndex] = condition;
-
-		Bool notAllFalse = SignMask(condition) != 0;
-		branch(notAllFalse, loopBlock, endBlock);
-		
-		Nucleus::setInsertBlock(endBlock);
-		r.enableBreak = restoreBreak;
-		
-		Nucleus::setInsertBlock(loopBlock);
-
-		loopRepDepth++;
-		breakDepth = 0;
-	}
-
-	void PixelRoutine::RET(Registers &r)
-	{
-		if(currentLabel == -1)
-		{
-			returnBlock = Nucleus::createBasicBlock();
-			Nucleus::createBr(returnBlock);
-		}
-		else
-		{
-			llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
-
-			if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
-			{
-				// FIXME: Encapsulate
-				UInt index = r.callStack[--r.stackIndex];
- 
-				llvm::Value *value = index.loadValue();
-				llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
-
-				for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
-				{
-					Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
-				}
-			}
-			else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
-			{
-				Nucleus::createBr(callRetBlock[currentLabel][0]);
-			}
-			else   // Function isn't called
-			{
-				Nucleus::createBr(unreachableBlock);
-			}
-
-			Nucleus::setInsertBlock(unreachableBlock);
-			Nucleus::createUnreachable();
-		}
-	}
-
-	void PixelRoutine::LEAVE(Registers &r)
-	{
-		r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
-
-		// FIXME: Return from function if all instances left
-		// FIXME: Use enableLeave in other control-flow constructs
-	}
-	
-	void PixelRoutine::writeDestination(Registers &r, Vector4s &d, const Dst &dst)
-	{
-		switch(dst.type)
-		{
-		case Shader::PARAMETER_TEMP:
-			if(dst.mask & 0x1) r.rs[dst.index].x = d.x;
-			if(dst.mask & 0x2) r.rs[dst.index].y = d.y;
-			if(dst.mask & 0x4) r.rs[dst.index].z = d.z;
-			if(dst.mask & 0x8) r.rs[dst.index].w = d.w;
-			break;
-		case Shader::PARAMETER_INPUT:
-			if(dst.mask & 0x1) r.vs[dst.index].x = d.x;
-			if(dst.mask & 0x2) r.vs[dst.index].y = d.y;
-			if(dst.mask & 0x4) r.vs[dst.index].z = d.z;
-			if(dst.mask & 0x8) r.vs[dst.index].w = d.w;
-			break;
-		case Shader::PARAMETER_CONST:			ASSERT(false);	break;
-		case Shader::PARAMETER_TEXTURE:
-			if(dst.mask & 0x1) r.ts[dst.index].x = d.x;
-			if(dst.mask & 0x2) r.ts[dst.index].y = d.y;
-			if(dst.mask & 0x4) r.ts[dst.index].z = d.z;
-			if(dst.mask & 0x8) r.ts[dst.index].w = d.w;
-			break;
-		case Shader::PARAMETER_COLOROUT:
-			if(dst.mask & 0x1) r.vs[dst.index].x = d.x;
-			if(dst.mask & 0x2) r.vs[dst.index].y = d.y;
-			if(dst.mask & 0x4) r.vs[dst.index].z = d.z;
-			if(dst.mask & 0x8) r.vs[dst.index].w = d.w;
-			break;
-		default:
-			ASSERT(false);
-		}
-	}
-
-	Vector4s PixelRoutine::fetchRegisterS(Registers &r, const Src &src)
-	{
-		Vector4s *reg;
-		int i = src.index;
-
-		Vector4s c;
-
-		if(src.type == Shader::PARAMETER_CONST)
-		{
-			c.x = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][0]));
-			c.y = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][1]));
-			c.z = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][2]));
-			c.w = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][3]));
-		}
-
-		switch(src.type)
-		{
-		case Shader::PARAMETER_TEMP:          reg = &r.rs[i]; break;
-		case Shader::PARAMETER_INPUT:         reg = &r.vs[i]; break;
-		case Shader::PARAMETER_CONST:         reg = &c;       break;
-		case Shader::PARAMETER_TEXTURE:       reg = &r.ts[i]; break;
-		case Shader::PARAMETER_VOID:          return r.rs[0]; // Dummy
-		case Shader::PARAMETER_FLOAT4LITERAL: return r.rs[0]; // Dummy
-		default:
-			ASSERT(false);
-		}
-
-		const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
-		const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
-		const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
-		const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
-
-		Vector4s mod;
-
-		switch(src.modifier)
-		{
-		case Shader::MODIFIER_NONE:
-			mod.x = x;
-			mod.y = y;
-			mod.z = z;
-			mod.w = w;
-			break;
-		case Shader::MODIFIER_BIAS:
-			mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			break;
-		case Shader::MODIFIER_BIAS_NEGATE:
-			mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
-			mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
-			mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
-			mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
-			break;
-		case Shader::MODIFIER_COMPLEMENT:
-			mod.x = SubSat(Short4(0x1000), x);
-			mod.y = SubSat(Short4(0x1000), y);
-			mod.z = SubSat(Short4(0x1000), z);
-			mod.w = SubSat(Short4(0x1000), w);
-			break;
-		case Shader::MODIFIER_NEGATE:
-			mod.x = -x;
-			mod.y = -y;
-			mod.z = -z;
-			mod.w = -w;
-			break;
-		case Shader::MODIFIER_X2:
-			mod.x = AddSat(x, x);
-			mod.y = AddSat(y, y);
-			mod.z = AddSat(z, z);
-			mod.w = AddSat(w, w);
-			break;
-		case Shader::MODIFIER_X2_NEGATE:
-			mod.x = -AddSat(x, x);
-			mod.y = -AddSat(y, y);
-			mod.z = -AddSat(z, z);
-			mod.w = -AddSat(w, w);
-			break;
-		case Shader::MODIFIER_SIGN:
-			mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
-			mod.x = AddSat(mod.x, mod.x);
-			mod.y = AddSat(mod.y, mod.y);
-			mod.z = AddSat(mod.z, mod.z);
-			mod.w = AddSat(mod.w, mod.w);
-			break;
-		case Shader::MODIFIER_SIGN_NEGATE:
-			mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
-			mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
-			mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
-			mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
-			mod.x = AddSat(mod.x, mod.x);
-			mod.y = AddSat(mod.y, mod.y);
-			mod.z = AddSat(mod.z, mod.z);
-			mod.w = AddSat(mod.w, mod.w);
-			break;
-		case Shader::MODIFIER_DZ:
-			mod.x = x;
-			mod.y = y;
-			mod.z = z;
-			mod.w = w;
-			// Projection performed by texture sampler
-			break;
-		case Shader::MODIFIER_DW:
-			mod.x = x;
-			mod.y = y;
-			mod.z = z;
-			mod.w = w;
-			// Projection performed by texture sampler
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
-		{
-			mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
-			mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
-			mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
-			mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
-		}
-
-		return mod;
-	}
-
-	Vector4f PixelRoutine::fetchRegisterF(Registers &r, const Src &src, int offset)
-	{
-		Vector4f reg;
-		int i = src.index + offset;
-
-		switch(src.type)
-		{
-		case Shader::PARAMETER_TEMP:
-			if(src.rel.type == Shader::PARAMETER_VOID)
-			{
-				reg = r.rf[i];
-			}
-			else
-			{
-				Int a = relativeAddress(r, src);
-
-				reg = r.rf[i + a];
-			}
-			break;
-		case Shader::PARAMETER_INPUT:
-			{
-				if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
-				{
-					reg = r.vf[i];
-				}
-				else if(src.rel.type == Shader::PARAMETER_LOOP)
-				{
-					Int aL = r.aL[r.loopDepth];
-
-					reg = r.vf[i + aL];
-				}
-				else
-				{
-					Int a = relativeAddress(r, src);
-					
-					reg = r.vf[i + a];
-				}
-			}
-			break;
-		case Shader::PARAMETER_CONST:
-			reg = readConstant(r, src, offset);
-			break;
-		case Shader::PARAMETER_TEXTURE:
-			reg = r.vf[2 + i];
-			break;
-		case Shader::PARAMETER_MISCTYPE:
-			if(src.index == 0)				reg = r.vPos;
-			if(src.index == 1)				reg = r.vFace;
-			break;
-		case Shader::PARAMETER_SAMPLER:
-			if(src.rel.type == Shader::PARAMETER_VOID)
-			{
-				reg.x = As<Float4>(Int4(i));
-			}
-			else if(src.rel.type == Shader::PARAMETER_TEMP)
-			{
-				reg.x = As<Float4>(Int4(i) + RoundInt(r.rf[src.rel.index].x));
-			}
-			return reg;
-		case Shader::PARAMETER_PREDICATE:	return reg;   // Dummy
-		case Shader::PARAMETER_VOID:		return reg;   // Dummy
-		case Shader::PARAMETER_FLOAT4LITERAL:
-			reg.x = Float4(src.value[0]);
-			reg.y = Float4(src.value[1]);
-			reg.z = Float4(src.value[2]);
-			reg.w = Float4(src.value[3]);
-			break;
-		case Shader::PARAMETER_CONSTINT:	return reg;   // Dummy
-		case Shader::PARAMETER_CONSTBOOL:	return reg;   // Dummy
-		case Shader::PARAMETER_LOOP:		return reg;   // Dummy
-		case Shader::PARAMETER_COLOROUT:
-			reg = r.oC[i];
-			break;
-		case Shader::PARAMETER_DEPTHOUT:
-			reg.x = r.oDepth;
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
-		const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
-		const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
-		const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
-
-		Vector4f mod;
-
-		switch(src.modifier)
-		{
-		case Shader::MODIFIER_NONE:
-			mod.x = x;
-			mod.y = y;
-			mod.z = z;
-			mod.w = w;
-			break;
-		case Shader::MODIFIER_NEGATE:
-			mod.x = -x;
-			mod.y = -y;
-			mod.z = -z;
-			mod.w = -w;
-			break;
-		case Shader::MODIFIER_ABS:
-			mod.x = Abs(x);
-			mod.y = Abs(y);
-			mod.z = Abs(z);
-			mod.w = Abs(w);
-			break;
-		case Shader::MODIFIER_ABS_NEGATE:
-			mod.x = -Abs(x);
-			mod.y = -Abs(y);
-			mod.z = -Abs(z);
-			mod.w = -Abs(w);
-			break;
-		case Shader::MODIFIER_NOT:
-			mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
-			mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
-			mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
-			mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
-			break;
-		default:
-			ASSERT(false);
-		}
-
-		return mod;
-	}
-
-	Vector4f PixelRoutine::readConstant(Registers &r, const Src &src, int offset)
-	{
-		Vector4f c;
-
-		int i = src.index + offset;
-
-		if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
-		{
-			c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]));
-
-			c.x = c.x.xxxx;
-			c.y = c.y.yyyy;
-			c.z = c.z.zzzz;
-			c.w = c.w.wwww;
-
-			if(shader->containsDefineInstruction())   // Constant may be known at compile time
-			{
-				for(size_t j = 0; j < shader->getLength(); j++)
-				{
-					const Shader::Instruction &instruction = *shader->getInstruction(j);
-
-					if(instruction.opcode == Shader::OPCODE_DEF)
-					{
-						if(instruction.dst.index == i)
-						{
-							c.x = Float4(instruction.src[0].value[0]);
-							c.y = Float4(instruction.src[0].value[1]);
-							c.z = Float4(instruction.src[0].value[2]);
-							c.w = Float4(instruction.src[0].value[3]);
-
-							break;
-						}
-					}
-				}
-			}
-		}
-		else if(src.rel.type == Shader::PARAMETER_LOOP)
-		{
-			Int loopCounter = r.aL[r.loopDepth];
-
-			c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]) + loopCounter * 16);
-
-			c.x = c.x.xxxx;
-			c.y = c.y.yyyy;
-			c.z = c.z.zzzz;
-			c.w = c.w.wwww;
-		}
-		else
-		{
-			Int a = relativeAddress(r, src);
-			
-			c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]) + a * 16);
-
-			c.x = c.x.xxxx;
-			c.y = c.y.yyyy;
-			c.z = c.z.zzzz;
-			c.w = c.w.wwww;
-		}
-
-		return c;
-	}
-
-	Int PixelRoutine::relativeAddress(Registers &r, const Shader::Parameter &var)
-	{
-		ASSERT(var.rel.deterministic);
-
-		if(var.rel.type == Shader::PARAMETER_TEMP)
-		{
-			return RoundInt(Extract(r.rf[var.rel.index].x, 0)) * var.rel.scale;
-		}
-		else if(var.rel.type == Shader::PARAMETER_INPUT)
-		{
-			return RoundInt(Extract(r.vf[var.rel.index].x, 0)) * var.rel.scale;
-		}
-		else if(var.rel.type == Shader::PARAMETER_OUTPUT)
-		{
-			return RoundInt(Extract(r.oC[var.rel.index].x, 0)) * var.rel.scale;
-		}
-		else if(var.rel.type == Shader::PARAMETER_CONST)
-		{
-			RValue<Float4> c = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[var.rel.index]));
-
-			return RoundInt(Extract(c, 0)) * var.rel.scale;
-		}
-		else ASSERT(false);
-
-		return 0;
-	}
-
-	Int4 PixelRoutine::enableMask(Registers &r, const Shader::Instruction *instruction)
-	{
-		Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
-		
-		if(!whileTest)
-		{
-			if(shader->containsBreakInstruction() && instruction->analysisBreak)
-			{
-				enable &= r.enableBreak;
-			}
-
-			if(shader->containsContinueInstruction() && instruction->analysisContinue)
-			{
-				enable &= r.enableContinue;
-			}
-
-			if(shader->containsLeaveInstruction() && instruction->analysisLeave)
-			{
-				enable &= r.enableLeave;
-			}
-		}
-
-		return enable;
-	}
-
 	bool PixelRoutine::colorUsed()
 	{
 		return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
 	}
-
-	unsigned short PixelRoutine::shaderVersion() const
-	{
-		return shader ? shader->getVersion() : 0x0000;
-	}
-
-	bool PixelRoutine::interpolateZ() const
-	{
-		return state.depthTestActive || state.pixelFogActive() || (shader && shader->vPosDeclared && fullPixelPositionRegister);
-	}
-
-	bool PixelRoutine::interpolateW() const
-	{
-		return state.perspective || (shader && shader->vPosDeclared && fullPixelPositionRegister);
-	}
 }
diff --git a/src/Shader/PixelRoutine.hpp b/src/Shader/PixelRoutine.hpp
index dd645f0..fd85a34 100644
--- a/src/Shader/PixelRoutine.hpp
+++ b/src/Shader/PixelRoutine.hpp
@@ -12,171 +12,62 @@
 #ifndef sw_PixelRoutine_hpp
 #define sw_PixelRoutine_hpp
 
-#include "Rasterizer.hpp"
-#include "ShaderCore.hpp"
-#include "PixelShader.hpp"
-
-#include "Types.hpp"
+#include "QuadRasterizer.hpp"
 
 namespace sw
 {
-	extern bool forceClearRegisters;
-
 	class PixelShader;
 	class SamplerCore;
 
-	class PixelRoutine : public Rasterizer, public ShaderCore
+	class PixelRoutine : public sw::QuadRasterizer, public ShaderCore
 	{
 		friend class PixelProcessor;   // FIXME
 
 	public:
 		PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader);
 
-		~PixelRoutine();
+		virtual ~PixelRoutine();
 
 	protected:
-		struct Registers
+		struct Registers : public QuadRasterizer::Registers
 		{
-			Registers(const PixelShader *shader) :
-				current(rs[0]), diffuse(vs[0]), specular(vs[1]),
-				rf(shader && shader->dynamicallyIndexedTemporaries),
-				vf(shader && shader->dynamicallyIndexedInput)
-			{
-				if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters)
-				{
-					for(int i = 0; i < 10; i++)
-					{
-						vf[i].x = Float4(0.0f);
-						vf[i].y = Float4(0.0f);
-						vf[i].z = Float4(0.0f);
-						vf[i].w = Float4(0.0f);
-					}
-				}
+			Registers(const PixelShader *shader);
 
-				loopDepth = -1;
-				enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-				
-				if(shader && shader->containsBreakInstruction())
-				{
-					enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-				}
+			Float4 z[4]; // Multisampled z
+			Float4 w;    // Used as is
+			Float4 rhw;  // Reciprocal w
 
-				if(shader && shader->containsContinueInstruction())
-				{
-					enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-				}
+			RegisterArray<4096> rf; // Computation registers
+			RegisterArray<10> vf;   // Varying registers
 
-				occlusion = 0;
-				
-				#if PERF_PROFILE
-					for(int i = 0; i < PERF_TIMERS; i++)
-					{
-						cycles[i] = 0;
-					}
-				#endif
-			}
-
-			Pointer<Byte> constants;
-
-			Pointer<Byte> primitive;
-			Int cluster;
-			Pointer<Byte> data;
-
-			Float4 z[4];
-			Float4 w;
-			Float4 rhw;
-
-			Float4 Dz[4];
-			Float4 Dw;
-			Float4 Dv[10][4];
-			Float4 Df;
-
-			Vector4s &current;
-			Vector4s &diffuse;
-			Vector4s &specular;
-
-			Vector4s rs[6];
-			Vector4s vs[2];
-			Vector4s ts[6];
-
-			RegisterArray<4096> rf;
-			RegisterArray<10> vf;
-
-			Vector4f vPos;
-			Vector4f vFace;
-
+			// Outputs
 			Vector4f oC[4];
 			Float4 oDepth;
-
-			Vector4f p0;
-			Array<Int, 4> aL;
-
-			Array<Int, 4> increment;
-			Array<Int, 4> iteration;
-
-			Int loopDepth;
-			Int stackIndex;   // FIXME: Inc/decrement callStack
-			Array<UInt, 16> callStack;
-
-			Int enableIndex;
-			Array<Int4, 1 + 24> enableStack;
-			Int4 enableBreak;
-			Int4 enableContinue;
-			Int4 enableLeave;
-
-			// bem(l) offsets and luminance
-			Float4 du;
-			Float4 dv;
-			Short4 L;
-
-			// texm3x3 temporaries
-			Float4 u_;   // FIXME
-			Float4 v_;   // FIXME
-			Float4 w_;   // FIXME
-			Float4 U;   // FIXME
-			Float4 V;   // FIXME
-			Float4 W;   // FIXME
-
-			UInt occlusion;
-
-			#if PERF_PROFILE
-				Long cycles[PERF_TIMERS];
-			#endif
 		};
 
-		typedef Shader::DestinationParameter Dst;
+
 		typedef Shader::SourceParameter Src;
-		typedef Shader::Control Control;
+		typedef Shader::DestinationParameter Dst;
 
-		void quad(Registers &r, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y);
+		virtual void setBuiltins(Registers &r, Int &x, Int &y, Float4(&z)[4], Float4 &w) = 0;
+		virtual void applyShader(Registers &r, Int cMask[4]) = 0;
+		virtual Bool alphaTest(Registers &r, Int cMask[4]) = 0;
+		virtual void rasterOperation(Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) = 0;
 
-		Float4 interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
+		virtual void quad(QuadRasterizer::Registers &r, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y);
+
 		Float4 interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
 		void stencilTest(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask);
 		void stencilTest(Registers &r, Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW);
 		void stencilOperation(Registers &r, Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask);
 		void stencilOperation(Registers &r, Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW);
 		Bool depthTest(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask);
-		void blendTexture(Registers &r, Vector4s &temp, Vector4s &texture, int stage);
 		void alphaTest(Registers &r, Int &aMask, Short4 &alpha);
 		void alphaToCoverage(Registers &r, Int cMask[4], Float4 &alpha);
-		Bool alphaTest(Registers &r, Int cMask[4], Vector4s &current);
-		Bool alphaTest(Registers &r, Int cMask[4], Vector4f &c0);
-		void fogBlend(Registers &r, Vector4s &current, Float4 &fog, Float4 &z, Float4 &rhw);
 		void fogBlend(Registers &r, Vector4f &c0, Float4 &fog, Float4 &z, Float4 &rhw);
 		void pixelFog(Registers &r, Float4 &visibility, Float4 &z, Float4 &rhw);
-		void specularPixel(Vector4s &current, Vector4s &specular);
 
-		void sampleTexture(Registers &r, Vector4s &c, int coordinates, int sampler, bool project = false);
-		void sampleTexture(Registers &r, Vector4s &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project = false, bool bias = false);
-		void sampleTexture(Registers &r, Vector4s &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false);
-		void sampleTexture(Registers &r, Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false);
-		void sampleTexture(Registers &r, Vector4f &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project = false, bool bias = false, bool gradients = false, bool lodProvided = false);
-	
 		// Raster operations
-		void clampColor(Vector4f oC[4]);
-		void rasterOperation(Vector4s &current, Registers &r, Float4 &fog, Pointer<Byte> &cBuffer, Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);
-		void rasterOperation(Vector4f oC[4], Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]);
 		void blendFactor(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive);
 		void blendFactorAlpha(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive);
 		void readPixel(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x, Vector4s &pixel);
@@ -190,134 +81,16 @@
 		void writeStencil(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask);
 		void writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask);
 
-		void ps_1_x(Registers &r, Int cMask[4]);
-		void ps_2_x(Registers &r, Int cMask[4]);
-
-		Short4 convertFixed12(RValue<Float4> cf);
-		void convertFixed12(Vector4s &cs, Vector4f &cf);
-		Float4 convertSigned12(Short4 &cs);
-		void convertSigned12(Vector4f &cf, Vector4s &cs);
-		Float4 convertUnsigned16(UShort4 cs);
 		UShort4 convertFixed16(Float4 &cf, bool saturate = true);
-		void convertFixed16(Vector4s &cs, Vector4f &cf, bool saturate = true);
 		void sRGBtoLinear16_12_16(Registers &r, Vector4s &c);
 		void sRGBtoLinear12_16(Registers &r, Vector4s &c);
 		void linearToSRGB16_12_16(Registers &r, Vector4s &c);
 		void linearToSRGB12_16(Registers &r, Vector4s &c);
 		Float4 sRGBtoLinear(const Float4 &x);
-		Float4 linearToSRGB(const Float4 &x);
-
-		// ps_1_x instructions
-		void MOV(Vector4s &dst, Vector4s &src0);
-		void ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1);
-		void SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1);
-		void MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2);
-		void MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1);
-		void DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1);
-		void DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1);
-		void LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2);
-		void TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate);
-		void TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project);
-		void TEXDP3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src);
-		void TEXDP3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0);
-		void TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s);
-		void TEXKILL(Int cMask[4], Vector4s &dst);
-		void TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, bool project);
-		void TEXLD(Registers &r, Vector4s &dst, Vector4s &src, int stage, bool project);
-		void TEXBEM(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage);
-		void TEXBEML(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage);
-		void TEXREG2AR(Registers &r, Vector4s &dst, Vector4s &src0, int stage);
-		void TEXREG2GB(Registers &r, Vector4s &dst, Vector4s &src0, int stage);
-		void TEXREG2RGB(Registers &r, Vector4s &dst, Vector4s &src0, int stage);
-		void TEXM3X2DEPTH(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling);
-		void TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling);
-		void TEXM3X2TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling);
-		void TEXM3X3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling);
-		void TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling);
-		void TEXM3X3SPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1);
-		void TEXM3X3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool singedScaling);
-		void TEXM3X3VSPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0);
-		void TEXDEPTH(Registers &r);
-		void CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2);
-		void CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2);
-		void BEM(Registers &r, Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage);
-
-		// ps_2_x instructions
-		void M3X2(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
-		void M3X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
-		void M3X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
-		void M4X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
-		void M4X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1);
-		void TEXLD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias);
-		void TEXLDD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2,  Vector4f &src3, bool project, bool bias);
-		void TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias);
-		void TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask);
-		void DISCARD(Registers &r, Int cMask[4], const Shader::Instruction *instruction);
-		void DFDX(Vector4f &dst, Vector4f &src);
-		void DFDY(Vector4f &dst, Vector4f &src);
-		void FWIDTH(Vector4f &dst, Vector4f &src);
-		void BREAK(Registers &r);
-		void BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control);
-		void BREAKP(Registers &r, const Src &predicateRegister);
-		void BREAK(Registers &r, Int4 &condition);
-		void CONTINUE(Registers &r);
-		void TEST();
-		void CALL(Registers &r, int labelIndex, int callSiteIndex);
-		void CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src);
-		void CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister);
-		void CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister);
-		void ELSE(Registers &r);
-		void ENDIF(Registers &r);
-		void ENDLOOP(Registers &r);
-		void ENDREP(Registers &r);
-		void ENDWHILE(Registers &r);
-		void IF(Registers &r, const Src &src);
-		void IFb(Registers &r, const Src &boolRegister);
-		void IFp(Registers &r, const Src &predicateRegister);
-		void IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control);
-		void IF(Registers &r, Int4 &condition);
-		void LABEL(int labelIndex);
-		void LOOP(Registers &r, const Src &integerRegister);
-		void REP(Registers &r, const Src &integerRegister);
-		void WHILE(Registers &r, const Src &temporaryRegister);
-		void RET(Registers &r);
-		void LEAVE(Registers &r);
-
-		void writeDestination(Registers &r, Vector4s &d, const Dst &dst);
-		Vector4s fetchRegisterS(Registers &r, const Src &src);
-		Vector4f fetchRegisterF(Registers &r, const Src &src, int offset = 0);
-		Vector4f readConstant(Registers &r, const Src &src, int offset = 0);
-		Int relativeAddress(Registers &r, const Shader::Parameter &var);
-		Int4 enableMask(Registers &r, const Shader::Instruction *instruction);
 
 		bool colorUsed();
-		unsigned short shaderVersion() const;
-		bool interpolateZ() const;
-		bool interpolateW() const;
 
-		const PixelShader *const shader;
-
-	private:
 		SamplerCore *sampler[TEXTURE_IMAGE_UNITS];
-
-		bool perturbate;
-		bool luminance;
-		bool previousScaling;
-
-		int ifDepth;
-		int loopRepDepth;
-		int breakDepth;
-		int currentLabel;
-		bool whileTest;
-
-		// FIXME: Get rid of llvm::
-		llvm::BasicBlock *ifFalseBlock[24 + 24];
-		llvm::BasicBlock *loopRepTestBlock[4];
-		llvm::BasicBlock *loopRepEndBlock[4];
-		llvm::BasicBlock *labelBlock[2048];
-		std::vector<llvm::BasicBlock*> callRetBlock[2048];
-		llvm::BasicBlock *returnBlock;
-		bool isConditionalIf[24 + 24];
 	};
 }
 
diff --git a/src/SwiftShader/SwiftShader.vcxproj b/src/SwiftShader/SwiftShader.vcxproj
index 5b4dc71..5eca09b 100644
--- a/src/SwiftShader/SwiftShader.vcxproj
+++ b/src/SwiftShader/SwiftShader.vcxproj
@@ -321,6 +321,8 @@
     <ClCompile Include="..\Main\FrameBufferWin.cpp" />

     <ClCompile Include="..\Main\Register.cpp" />

     <ClCompile Include="..\Shader\Constants.cpp" />

+    <ClCompile Include="..\Shader\PixelPipeline.cpp" />

+    <ClCompile Include="..\Shader\PixelProgram.cpp" />

     <ClCompile Include="..\Shader\PixelRoutine.cpp" />

     <ClCompile Include="..\Shader\PixelShader.cpp" />

     <ClCompile Include="..\Shader\SamplerCore.cpp" />

@@ -390,6 +392,8 @@
     <ClInclude Include="..\Main\FrameBufferWin.hpp" />

     <ClInclude Include="..\Main\Register.hpp" />

     <ClInclude Include="..\Renderer\RoutineCache.hpp" />

+    <ClInclude Include="..\Shader\PixelPipeline.hpp" />

+    <ClInclude Include="..\Shader\PixelProgram.hpp" />

     <ClInclude Include="MemoryManager.hpp" />

     <ClInclude Include="..\Shader\Constants.hpp" />

     <ClInclude Include="..\Shader\PixelRoutine.hpp" />

diff --git a/src/SwiftShader/SwiftShader.vcxproj.filters b/src/SwiftShader/SwiftShader.vcxproj.filters
index b60d548..572441c 100644
--- a/src/SwiftShader/SwiftShader.vcxproj.filters
+++ b/src/SwiftShader/SwiftShader.vcxproj.filters
@@ -179,6 +179,12 @@
     <ClCompile Include="..\Main\crc.cpp">

       <Filter>Source Files\Main</Filter>

     </ClCompile>

+    <ClCompile Include="..\Shader\PixelPipeline.cpp">

+      <Filter>Source Files\Shader</Filter>

+    </ClCompile>

+    <ClCompile Include="..\Shader\PixelProgram.cpp">

+      <Filter>Source Files\Shader</Filter>

+    </ClCompile>

   </ItemGroup>

   <ItemGroup>

     <ClInclude Include="MemoryManager.hpp">

@@ -356,6 +362,12 @@
     <ClInclude Include="..\Common\SharedLibrary.hpp">

       <Filter>Header Files\Common</Filter>

     </ClInclude>

+    <ClInclude Include="..\Shader\PixelProgram.hpp">

+      <Filter>Header Files\Shader</Filter>

+    </ClInclude>

+    <ClInclude Include="..\Shader\PixelPipeline.hpp">

+      <Filter>Header Files\Shader</Filter>

+    </ClInclude>

   </ItemGroup>

   <ItemGroup>

     <None Include="SwiftShader.ini" />