Add SwiftShader dump from Feb 6 2013
diff --git a/src/Renderer/Blitter.cpp b/src/Renderer/Blitter.cpp
index 2d709ca..8817739 100644
--- a/src/Renderer/Blitter.cpp
+++ b/src/Renderer/Blitter.cpp
@@ -1,6 +1,6 @@
 // SwiftShader Software Renderer
 //
-// Copyright(c) 2005-2012 TransGaming Inc.
+// Copyright(c) 2005-2013 TransGaming Inc.
 //
 // All rights reserved. No part of this software may be copied, distributed, transmitted,
 // transcribed, stored in a retrieval system, translated into any human or computer
@@ -18,7 +18,7 @@
 {
 	Blitter::Blitter()
 	{
-		blitCache = new LRUCache<BlitState, Routine>(1024);
+		blitCache = new RoutineCache<BlitState>(1024);
 	}
 
 	Blitter::~Blitter()
@@ -79,12 +79,16 @@
 		case FORMAT_L8:
 			c.xyz = Float(Int(*Pointer<Byte>(element)));
 			break;
+		case FORMAT_A8:
+			c.xyz = 0.0f;
+			c.w = Float(Int(*Pointer<Byte>(element)));
+			break;
 		case FORMAT_A8R8G8B8:
 			c = Float4(*Pointer<Byte4>(element)).zyxw;
 			break;
 		case FORMAT_X8R8G8B8:
 			c = Float4(*Pointer<Byte4>(element)).zyxw;
-			c.w = Float(1.0f);
+			c.w = 1.0f;
 			break;
 		case FORMAT_A16B16G16R16:
 			c = Float4(*Pointer<UShort4>(element));
@@ -123,12 +127,12 @@
 		
 		if(!blitRoutine)
 		{
-			Function<Void, Pointer<Byte>> function;
+			Function<Void, Pointer<Byte> > function;
 			{
 				Pointer<Byte> blit(function.arg(0));
 
-				Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
-				Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
+				Pointer<Byte> source = *Pointer<Pointer<Byte> >(blit + OFFSET(BlitData,source));
+				Pointer<Byte> dest = *Pointer<Pointer<Byte> >(blit + OFFSET(BlitData,dest));
 				Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
 				Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
 
@@ -169,8 +173,8 @@
 						}
 						else   // Bilinear filtering
 						{
-							Float x0 = x - Float(0.5f);
-							Float y0 = y - Float(0.5f);
+							Float x0 = x - 0.5f;
+							Float y0 = y - 0.5f;
 
 							Int X0 = Max(Int(x0), 0);
 							Int Y0 = Max(Int(y0), 0);
@@ -202,6 +206,7 @@
 						switch(state.sourceFormat)
 						{
 						case FORMAT_L8:
+						case FORMAT_A8:
 						case FORMAT_A8R8G8B8:
 						case FORMAT_X8R8G8B8:
 							unscale = vector(255, 255, 255, 255);
@@ -224,6 +229,7 @@
 						switch(state.destFormat)
 						{
 						case FORMAT_L8:
+						case FORMAT_A8:
 						case FORMAT_A8R8G8B8:
 						case FORMAT_X8R8G8B8:
 							scale = vector(255, 255, 255, 255);
@@ -263,6 +269,9 @@
 						case FORMAT_L8:
 							*Pointer<Byte>(d) = Byte(RoundInt(Float(color.x)));
 							break;
+						case FORMAT_A8:
+							*Pointer<Byte>(d) = Byte(RoundInt(Float(color.w)));
+							break;
 						case FORMAT_A8R8G8B8:
 							{
 								UShort4 c0 = As<UShort4>(RoundShort4(color.zyxw));
@@ -274,7 +283,7 @@
 							{
 								UShort4 c0 = As<UShort4>(RoundShort4(color.zyxw));
 								Byte8 c1 = Pack(c0, c0);
-								*Pointer<UInt>(d) = UInt(As<Long>(c1)) | UInt(0xFF000000);
+								*Pointer<UInt>(d) = UInt(As<Long>(c1)) | 0xFF000000;
 							}
 							break;
 						case FORMAT_A16B16G16R16:
@@ -301,8 +310,6 @@
 
 					y += h;
 				}
-
-				Emms();   // FIXME: Not required when performing blits in the renderer engine
 			}
 
 			blitRoutine = function(L"BlitRoutine");
@@ -310,7 +317,7 @@
 			blitCache->add(state, blitRoutine);
 		}
 
-		void (__cdecl *blitFunction)(const BlitData *data) = (void(__cdecl*)(const BlitData*))blitRoutine->getEntry();
+		void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
 
 		BlitData data;
 
diff --git a/src/Renderer/Blitter.hpp b/src/Renderer/Blitter.hpp
index 8d9554d..dc3bec9 100644
--- a/src/Renderer/Blitter.hpp
+++ b/src/Renderer/Blitter.hpp
@@ -13,13 +13,13 @@
 #define sw_Blitter_hpp

 

 #include "Surface.hpp"

-#include "LRUCache.hpp"

+#include "RoutineCache.hpp"

 #include "Reactor/Nucleus.hpp"

 

+#include <string.h>

+

 namespace sw

 {

-	class Routine;

-

 	class Blitter

 	{

 	public:

@@ -66,7 +66,7 @@
 			int sHeight;

 		};

 

-		LRUCache<BlitState, Routine> *blitCache;

+		RoutineCache<BlitState> *blitCache;

 	};

 }

 

diff --git a/src/Renderer/Clipper.hpp b/src/Renderer/Clipper.hpp
index de017b8..fca7b0c 100644
--- a/src/Renderer/Clipper.hpp
+++ b/src/Renderer/Clipper.hpp
@@ -1,66 +1,66 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2011 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef sw_Clipper_hpp

-#define sw_Clipper_hpp

-

-#include "Plane.hpp"

-#include "Common/Types.hpp"

-

-namespace sw

-{

-	struct Polygon;

-	struct DrawCall;

-	struct DrawData;

-

-	class Clipper

-	{

-	public:

-		enum ClipFlags

-		{

-			CLIP_RIGHT  = 1 << 0,

-			CLIP_TOP    = 1 << 1,

-			CLIP_FAR    = 1 << 2,

-			CLIP_LEFT   = 1 << 3,

-			CLIP_BOTTOM = 1 << 4,

-			CLIP_NEAR   = 1 << 5,

-

-			CLIP_FINITE = 1 << 7,

-

-			// User-defined clipping planes

-			CLIP_PLANE0	= 1 << 8,

-			CLIP_PLANE1	= 1 << 9,

-			CLIP_PLANE2	= 1 << 10,

-			CLIP_PLANE3	= 1 << 11,

-			CLIP_PLANE4	= 1 << 12,

-			CLIP_PLANE5	= 1 << 13

-		};

-

-		Clipper();

-

-		~Clipper();

-

-		bool clip(Polygon &polygon, int clipFlagsOr, const DrawCall &draw);

-

-	private:

-		void clipNear(Polygon &polygon);

-		void clipFar(Polygon &polygon);

-		void clipLeft(Polygon &polygon, const DrawData &data);

-		void clipRight(Polygon &polygon, const DrawData &data);

-		void clipTop(Polygon &polygon, const DrawData &data);

-		void clipBottom(Polygon &polygon, const DrawData &data);

-		void clipPlane(Polygon &polygon, const Plane &plane);

-

-		void clipEdge(float4 &Vo, const float4 &Vi, const float4 &Vj, float di, float dj) const;

-	};

-}

-

-#endif   // sw_Clipper_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2011 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef sw_Clipper_hpp
+#define sw_Clipper_hpp
+
+#include "Plane.hpp"
+#include "Common/Types.hpp"
+
+namespace sw
+{
+	struct Polygon;
+	struct DrawCall;
+	struct DrawData;
+
+	class Clipper
+	{
+	public:
+		enum ClipFlags
+		{
+			CLIP_RIGHT  = 1 << 0,
+			CLIP_TOP    = 1 << 1,
+			CLIP_FAR    = 1 << 2,
+			CLIP_LEFT   = 1 << 3,
+			CLIP_BOTTOM = 1 << 4,
+			CLIP_NEAR   = 1 << 5,
+
+			CLIP_FINITE = 1 << 7,
+
+			// User-defined clipping planes
+			CLIP_PLANE0	= 1 << 8,
+			CLIP_PLANE1	= 1 << 9,
+			CLIP_PLANE2	= 1 << 10,
+			CLIP_PLANE3	= 1 << 11,
+			CLIP_PLANE4	= 1 << 12,
+			CLIP_PLANE5	= 1 << 13
+		};
+
+		Clipper();
+
+		~Clipper();
+
+		bool clip(Polygon &polygon, int clipFlagsOr, const DrawCall &draw);
+
+	private:
+		void clipNear(Polygon &polygon);
+		void clipFar(Polygon &polygon);
+		void clipLeft(Polygon &polygon, const DrawData &data);
+		void clipRight(Polygon &polygon, const DrawData &data);
+		void clipTop(Polygon &polygon, const DrawData &data);
+		void clipBottom(Polygon &polygon, const DrawData &data);
+		void clipPlane(Polygon &polygon, const Plane &plane);
+
+		void clipEdge(float4 &Vo, const float4 &Vi, const float4 &Vj, float di, float dj) const;
+	};
+}
+
+#endif   // sw_Clipper_hpp
diff --git a/src/Renderer/Color.hpp b/src/Renderer/Color.hpp
index febfb08..51e8015 100644
--- a/src/Renderer/Color.hpp
+++ b/src/Renderer/Color.hpp
@@ -38,28 +38,28 @@
 		T &operator[](int i);

 		const T &operator[](int i) const;

 

-		Color operator+() const;

-		Color operator-() const;

+		Color<T> operator+() const;

+		Color<T> operator-() const;

 

-		Color& operator=(const Color& c);

+		Color<T>& operator=(const Color<T>& c);

 

-		Color &operator+=(const Color &c);

-		Color &operator*=(float l);

+		Color<T> &operator+=(const Color<T> &c);

+		Color<T> &operator*=(float l);

 

-		static Color gradient(const Color &c1, const Color  &c2, float d);

-		static Color shade(const Color &c1, const Color  &c2, float d);

+		static Color<T> gradient(const Color<T> &c1, const Color<T>  &c2, float d);

+		static Color<T> shade(const Color<T> &c1, const Color<T>  &c2, float d);

 

-		template<class T>

-		friend Color operator+(const Color &c1, const Color &c2);

-		template<class T>

-		friend Color operator-(const Color &c1, const Color &c2);

+		template<class S>

+		friend Color<S> operator+(const Color<S> &c1, const Color<S> &c2);

+		template<class S>

+		friend Color<S> operator-(const Color<S> &c1, const Color<S> &c2);

 

-		template<class T>

-		friend Color operator*(float l, const Color &c);

-		template<class T>

-		friend Color operator*(const Color &c1, const Color &c2);

-		template<class T>

-		friend Color operator/(const Color &c, float l);

+		template<class S>

+		friend Color<S> operator*(float l, const Color<S> &c);

+		template<class S>

+		friend Color<S> operator*(const Color<S> &c1, const Color<S> &c2);

+		template<class S>

+		friend Color<S> operator/(const Color<S> &c, float l);

 

 		T r;

 		T g;

@@ -77,6 +77,7 @@
 	{

 	}

 

+	template<>

 	inline Color<byte>::Color(const Color<byte> &c)

 	{

 		r = c.r;

@@ -85,6 +86,7 @@
 		a = c.a;

 	}

 

+	template<>

 	inline Color<byte>::Color(const Color<short> &c)

 	{

 		r = clamp(c.r >> 4, 0, 255);

@@ -93,6 +95,7 @@
 		a = clamp(c.a >> 4, 0, 255);

 	}

 

+	template<>

 	inline Color<byte>::Color(const Color<float> &c)

 	{

 		r = ifloor(clamp(c.r * 256.0f, 0.0f, 255.0f));

@@ -101,6 +104,7 @@
 		a = ifloor(clamp(c.a * 256.0f, 0.0f, 255.0f));

 	}

 

+	template<>

 	inline Color<short>::Color(const Color<short> &c)

 	{

 		r = c.r;

@@ -109,6 +113,7 @@
 		a = c.a;

 	}

 

+	template<>

 	inline Color<short>::Color(const Color<byte> &c)

 	{

 		r = c.r << 4;

@@ -117,6 +122,7 @@
 		a = c.a << 4;

 	}

 

+	template<>

 	inline Color<float>::Color(const Color<float> &c)

 	{

 		r = c.r;

@@ -125,6 +131,7 @@
 		a = c.a;

 	}

 

+	template<>

 	inline Color<short>::Color(const Color<float> &c)

 	{

 		r = iround(clamp(c.r * 4095.0f, -4096.0f, 4095.0f));

@@ -133,6 +140,7 @@
 		a = iround(clamp(c.a * 4095.0f, -4096.0f, 4095.0f));

 	}

 

+	template<>

 	inline Color<float>::Color(const Color<byte> &c)

 	{

 		r = c.r / 255.0f;

@@ -141,6 +149,7 @@
 		a = c.a / 255.0f;

 	}

 

+	template<>

 	inline Color<float>::Color(const Color<short> &c)

 	{

 		r = c.r / 4095.0f;

@@ -149,6 +158,7 @@
 		a = c.a / 4095.0f;

 	}

 

+	template<>

 	inline Color<float>::Color(unsigned short c)

 	{

 		r = (float)(c & 0xF800) / (float)0xF800;

@@ -157,6 +167,7 @@
 		a = 1;

 	}

 

+	template<>

 	inline Color<short>::Color(unsigned short c)

 	{

 		// 4.12 fixed-point format

@@ -166,6 +177,7 @@
 		a = 0x1000;

 	}

 

+	template<>

 	inline Color<byte>::Color(unsigned short c)

 	{

 		r = (byte)(((c & 0xF800) >> 8) + ((c & 0xE000) >> 13));

@@ -174,6 +186,7 @@
 		a = 0xFF;

 	}

 

+	template<>

 	inline Color<float>::Color(int c)

 	{

 		const float d = 1.0f / 255.0f;

@@ -184,6 +197,7 @@
 		a = (float)((c & 0xFF000000) >> 24) * d;

 	}

 

+	template<>

 	inline Color<short>::Color(int c)

 	{

 		// 4.12 fixed-point format

@@ -193,6 +207,7 @@
 		a = (short)((c & 0xFF000000) >> 20);

 	}

 

+	template<>

 	inline Color<byte>::Color(int c)

 	{

 		r = (byte)((c & 0x00FF0000) >> 16);

@@ -201,6 +216,7 @@
 		a = (byte)((c & 0xFF000000) >> 24);

 	}

 

+	template<>

 	inline Color<float>::Color(unsigned int c)

 	{

 		const float d = 1.0f / 255.0f;

@@ -211,6 +227,7 @@
 		a = (float)((c & 0xFF000000) >> 24) * d;

 	}

 

+	template<>

 	inline Color<short>::Color(unsigned int c)

 	{

 		// 4.12 fixed-point format

@@ -220,6 +237,7 @@
 		a = (short)((c & 0xFF000000) >> 20);

 	}

 

+	template<>

 	inline Color<byte>::Color(unsigned int c)

 	{

 		r = (byte)((c & 0x00FF0000) >> 16);

@@ -228,6 +246,7 @@
 		a = (byte)((c & 0xFF000000) >> 24);

 	}

 

+	template<>

 	inline Color<float>::Color(unsigned long c)

 	{

 		const float d = 1.0f / 255.0f;

@@ -238,6 +257,7 @@
 		a = (float)((c & 0xFF000000) >> 24) * d;

 	}

 

+	template<>

 	inline Color<short>::Color(unsigned long c)

 	{

 		// 4.12 fixed-point format

@@ -247,6 +267,7 @@
 		a = (short)((c & 0xFF000000) >> 20);

 	}

 

+	template<>

 	inline Color<byte>::Color(unsigned long c)

 	{

 		r = (byte)((c & 0x00FF0000) >> 16);

@@ -264,6 +285,7 @@
 		a = a_;

 	}

 

+	template<>

 	inline Color<float>::operator unsigned int() const

 	{

 		return ((unsigned int)min(b * 255.0f, 255.0f) << 0) |

@@ -272,6 +294,7 @@
 		       ((unsigned int)min(a * 255.0f, 255.0f) << 24);

 	}

 

+	template<>

 	inline Color<short>::operator unsigned int() const

 	{

 		return ((unsigned int)min(b >> 4, 255) << 0) |

@@ -280,6 +303,7 @@
 		       ((unsigned int)min(a >> 4, 255) << 24);

 	}

 

+	template<>

 	inline Color<byte>::operator unsigned int() const

 	{

 		return (b << 0) +

@@ -382,6 +406,7 @@
 		return Color<T>(r, g, b, a);

 	}

 

+	template<>

 	inline Color<short> operator*(const Color<short> &c1, const Color<short> &c2)

 	{

 		short r = c1.r * c2.r >> 12;

@@ -392,6 +417,7 @@
 		return Color<short>(r, g, b, a);

 	}

 

+	template<>

 	inline Color<byte> operator*(const Color<byte> &c1, const Color<byte> &c2)

 	{

 		byte r = c1.r * c2.r >> 8;

diff --git a/src/Renderer/Context.cpp b/src/Renderer/Context.cpp
index 794a23a..fcc7f03 100644
--- a/src/Renderer/Context.cpp
+++ b/src/Renderer/Context.cpp
@@ -456,7 +456,7 @@
 			return false;
 		}
 
-		return isDrawPoint(true) && (input[PSize] || (!preTransformed && pointScaleActive()));
+		return isDrawPoint(true) && (input[PointSize] || (!preTransformed && pointScaleActive()));
 	}
 
 	Context::FogMode Context::pixelFogActive()
@@ -1402,6 +1402,16 @@
 		return vertexShader ? vertexShader->getVersion() : 0x0000;
 	}
 
+	int Context::getMultiSampleCount() const
+	{
+		return renderTarget[0] ? renderTarget[0]->getMultiSampleCount() : 1;
+	}
+
+	int Context::getSuperSampleCount() const
+	{
+		return renderTarget[0] ? renderTarget[0]->getSuperSampleCount() : 1;
+	}
+
 	Format Context::renderTargetInternalFormat(int index)
 	{
 		if(renderTarget[index])
diff --git a/src/Renderer/Context.hpp b/src/Renderer/Context.hpp
index 6c7c99a..571f7c0 100644
--- a/src/Renderer/Context.hpp
+++ b/src/Renderer/Context.hpp
@@ -35,7 +35,7 @@
 		BlendWeight = 1,

 		BlendIndices = 2,

 		Normal = 3,

-		PSize = 4,

+		PointSize = 4,

 		Color0 = 5,

 		Color1 = 6,

 		TexCoord0 = 7,

@@ -361,6 +361,9 @@
 		unsigned short pixelShaderVersion() const;

 		unsigned short vertexShaderVersion() const;

 

+		int getMultiSampleCount() const;

+		int getSuperSampleCount() const;

+

 		DrawType drawType;

 

 		bool stencilEnable;

diff --git a/src/Renderer/Matrix.hpp b/src/Renderer/Matrix.hpp
index 1b9a39e..d3c9377 100644
--- a/src/Renderer/Matrix.hpp
+++ b/src/Renderer/Matrix.hpp
@@ -1,211 +1,211 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2011 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef Matrix_hpp

-#define Matrix_hpp

-

-namespace sw

-{

-	struct Vector;

-	struct Point;

-

-	struct Matrix

-	{

-		Matrix();

-		Matrix(const int i);

-		Matrix(const float m[16]);

-		Matrix(const float m[4][4]);

-		Matrix(float m11, float m12, float m13,

-		       float m21, float m22, float m23,

-		       float m31, float m32, float m33);

-		Matrix(float m11, float m12, float m13, float m14,

-		       float m21, float m22, float m23, float m24,

-		       float m31, float m32, float m33, float m34,

-		       float m41, float m42, float m43, float m44);

-		Matrix(const Vector &v1, const Vector &v2, const Vector &v3);   // Column vectors

-

-		Matrix &operator=(const Matrix &N);

-

-		// Row major order

-		float m[4][4];

-

-		static Matrix diag(float m11, float m22, float m33, float m44);

-

-		operator float*();

-

-		Matrix operator+() const;

-		Matrix operator-() const;

-

-		Matrix operator!() const;   // Inverse

-		Matrix operator~() const;   // Transpose

-

-		Matrix &operator+=(const Matrix &N);

-		Matrix &operator-=(const Matrix &N);

-		Matrix &operator*=(float s);

-		Matrix &operator*=(const Matrix &N);

-		Matrix &operator/=(float s);

-

-		float *operator[](int i);   // Access element [row][col], starting with [0][0]

-		const float *operator[](int i) const;

-

-		float &operator()(int i, int j);   // Access element (row, col), starting with (1, 1)

-		const float &operator()(int i, int j) const;

-

-		friend bool operator==(const Matrix &M, const Matrix &N);

-		friend bool operator!=(const Matrix &M, const Matrix &N);

-

-		friend Matrix operator+(const Matrix &M, const Matrix &N);

-		friend Matrix operator-(const Matrix &M, const Matrix &N);

-		friend Matrix operator*(float s, const Matrix &M);

-		friend Matrix operator*(const Matrix &M, const Matrix &N);

-		friend Matrix operator/(const Matrix &M, float s);

-

-		static float det(const Matrix &M);

-		static float det(float m11);

-		static float det(float m11, float m12,

-		                 float m21, float m22);

-		static float det(float m11, float m12, float m13,

-		                 float m21, float m22, float m23,

-		                 float m31, float m32, float m33);

-		static float det(float m11, float m12, float m13, float m14,

-		                 float m21, float m22, float m23, float m24,

-		                 float m31, float m32, float m33, float m34,

-		                 float m41, float m42, float m43, float m44);

-		static float det(const Vector &v1, const Vector &v2, const Vector &v3);

-		static float det3(const Matrix &M);

-

-		static float tr(const Matrix &M);

-

-		Matrix &orthogonalise();   // Gram-Schmidt orthogonalisation of 3x3 submatrix

-

-		static Matrix eulerRotate(const Vector &v);

-		static Matrix eulerRotate(float x, float y, float z);

-		

-		static Matrix translate(const Vector &v);

-		static Matrix translate(float x, float y, float z);

-		

-		static Matrix scale(const Vector &v);

-		static Matrix scale(float x, float y, float z);

-

-		static Matrix lookAt(const Vector &v);

-		static Matrix lookAt(float x, float y, float z);

-	};

-}

-

-#include "Vector.hpp"

-

-namespace sw

-{

-	inline Matrix::Matrix()

-	{

-	}

-

-	inline Matrix::Matrix(const int i)

-	{

-		const float s = (float)i;

-

-		Matrix &M = *this;

-

-		M(1, 1) = s; M(1, 2) = 0; M(1, 3) = 0; M(1, 4) = 0;

-		M(2, 1) = 0; M(2, 2) = s; M(2, 3) = 0; M(2, 4) = 0;

-		M(3, 1) = 0; M(3, 2) = 0; M(3, 3) = s; M(3, 4) = 0;

-		M(4, 1) = 0; M(4, 2) = 0; M(4, 3) = 0; M(4, 4) = s;

-	}

-

-	inline Matrix::Matrix(const float m[16])

-	{

-		Matrix &M = *this;

-

-		M(1, 1) = m[0];  M(1, 2) = m[1];  M(1, 3) = m[2];  M(1, 4) = m[3];

-		M(2, 1) = m[4];  M(2, 2) = m[5];  M(2, 3) = m[6];  M(2, 4) = m[7];

-		M(3, 1) = m[8];  M(3, 2) = m[8];  M(3, 3) = m[10]; M(3, 4) = m[11];

-		M(4, 1) = m[12]; M(4, 2) = m[13]; M(4, 3) = m[14]; M(4, 4) = m[15];

-	}

-

-	inline Matrix::Matrix(const float m[4][4])

-	{

-		Matrix &M = *this;

-

-		M[0][0] = m[0][0];  M[0][1] = m[0][1];  M[0][2] = m[0][2];  M[0][3] = m[0][3];

-		M[1][0] = m[1][0];  M[1][1] = m[1][1];  M[1][2] = m[1][2];  M[1][3] = m[1][3];

-		M[2][0] = m[2][0];  M[2][1] = m[2][1];  M[2][2] = m[2][2];  M[2][3] = m[2][3];

-		M[3][0] = m[3][0];  M[3][1] = m[3][1];  M[3][2] = m[3][2];  M[3][3] = m[3][3];

-	}

-

-	inline Matrix::Matrix(float m11, float m12, float m13, 

-	                      float m21, float m22, float m23, 

-	                      float m31, float m32, float m33)

-	{

-		Matrix &M = *this;

-

-		M(1, 1) = m11; M(1, 2) = m12; M(1, 3) = m13; M(1, 4) = 0;

-		M(2, 1) = m21; M(2, 2) = m22; M(2, 3) = m23; M(2, 4) = 0;

-		M(3, 1) = m31; M(3, 2) = m32; M(3, 3) = m33; M(3, 4) = 0;

-		M(4, 1) = 0;   M(4, 2) = 0;   M(4, 3) = 0;   M(4, 4) = 1;

-	}

-

-	inline Matrix::Matrix(float m11, float m12, float m13, float m14, 

-	                      float m21, float m22, float m23, float m24, 

-	                      float m31, float m32, float m33, float m34, 

-	                      float m41, float m42, float m43, float m44)

-	{

-		Matrix &M = *this;

-

-		M(1, 1) = m11; M(1, 2) = m12; M(1, 3) = m13; M(1, 4) = m14;

-		M(2, 1) = m21; M(2, 2) = m22; M(2, 3) = m23; M(2, 4) = m24;

-		M(3, 1) = m31; M(3, 2) = m32; M(3, 3) = m33; M(3, 4) = m34;

-		M(4, 1) = m41; M(4, 2) = m42; M(4, 3) = m43; M(4, 4) = m44;

-	}

-

-	inline Matrix::Matrix(const Vector &v1, const Vector &v2, const Vector &v3)

-	{

-		Matrix &M = *this;

-

-		M(1, 1) = v1.x; M(1, 2) = v2.x; M(1, 3) = v3.x; M(1, 4) = 0;

-		M(2, 1) = v1.y; M(2, 2) = v2.y; M(2, 3) = v3.y; M(2, 4) = 0;

-		M(3, 1) = v1.z; M(3, 2) = v2.z; M(3, 3) = v3.z; M(3, 4) = 0;

-		M(4, 1) = 0;    M(4, 2) = 0;    M(4, 3) = 0;    M(4, 4) = 1;

-	}

-

-	inline Matrix &Matrix::operator=(const Matrix &N)

-	{

-		Matrix &M = *this;

-

-		M(1, 1) = N(1, 1); M(1, 2) = N(1, 2); M(1, 3) = N(1, 3); M(1, 4) = N(1, 4);

-		M(2, 1) = N(2, 1); M(2, 2) = N(2, 2); M(2, 3) = N(2, 3); M(2, 4) = N(2, 4);

-		M(3, 1) = N(3, 1); M(3, 2) = N(3, 2); M(3, 3) = N(3, 3); M(3, 4) = N(3, 4);

-		M(4, 1) = N(4, 1); M(4, 2) = N(4, 2); M(4, 3) = N(4, 3); M(4, 4) = N(4, 4);

-

-		return M;

-	}

-

-	inline float *Matrix::operator[](int i)

-	{

-		return m[i];

-	}

-

-	inline const float *Matrix::operator[](int i) const

-	{

-		return m[i];

-	}

-

-	inline float &Matrix::operator()(int i, int j)

-	{

-		return m[i - 1][j - 1];

-	}

-

-	inline const float &Matrix::operator()(int i, int j) const

-	{

-		return m[i - 1][j - 1];

-	}

-}

-

-#endif   // Matrix_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2011 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef Matrix_hpp
+#define Matrix_hpp
+
+namespace sw
+{
+	struct Vector;
+	struct Point;
+
+	struct Matrix
+	{
+		Matrix();
+		Matrix(const int i);
+		Matrix(const float m[16]);
+		Matrix(const float m[4][4]);
+		Matrix(float m11, float m12, float m13,
+		       float m21, float m22, float m23,
+		       float m31, float m32, float m33);
+		Matrix(float m11, float m12, float m13, float m14,
+		       float m21, float m22, float m23, float m24,
+		       float m31, float m32, float m33, float m34,
+		       float m41, float m42, float m43, float m44);
+		Matrix(const Vector &v1, const Vector &v2, const Vector &v3);   // Column vectors
+
+		Matrix &operator=(const Matrix &N);
+
+		// Row major order
+		float m[4][4];
+
+		static Matrix diag(float m11, float m22, float m33, float m44);
+
+		operator float*();
+
+		Matrix operator+() const;
+		Matrix operator-() const;
+
+		Matrix operator!() const;   // Inverse
+		Matrix operator~() const;   // Transpose
+
+		Matrix &operator+=(const Matrix &N);
+		Matrix &operator-=(const Matrix &N);
+		Matrix &operator*=(float s);
+		Matrix &operator*=(const Matrix &N);
+		Matrix &operator/=(float s);
+
+		float *operator[](int i);   // Access element [row][col], starting with [0][0]
+		const float *operator[](int i) const;
+
+		float &operator()(int i, int j);   // Access element (row, col), starting with (1, 1)
+		const float &operator()(int i, int j) const;
+
+		friend bool operator==(const Matrix &M, const Matrix &N);
+		friend bool operator!=(const Matrix &M, const Matrix &N);
+
+		friend Matrix operator+(const Matrix &M, const Matrix &N);
+		friend Matrix operator-(const Matrix &M, const Matrix &N);
+		friend Matrix operator*(float s, const Matrix &M);
+		friend Matrix operator*(const Matrix &M, const Matrix &N);
+		friend Matrix operator/(const Matrix &M, float s);
+
+		static float det(const Matrix &M);
+		static float det(float m11);
+		static float det(float m11, float m12,
+		                 float m21, float m22);
+		static float det(float m11, float m12, float m13,
+		                 float m21, float m22, float m23,
+		                 float m31, float m32, float m33);
+		static float det(float m11, float m12, float m13, float m14,
+		                 float m21, float m22, float m23, float m24,
+		                 float m31, float m32, float m33, float m34,
+		                 float m41, float m42, float m43, float m44);
+		static float det(const Vector &v1, const Vector &v2, const Vector &v3);
+		static float det3(const Matrix &M);
+
+		static float tr(const Matrix &M);
+
+		Matrix &orthogonalise();   // Gram-Schmidt orthogonalisation of 3x3 submatrix
+
+		static Matrix eulerRotate(const Vector &v);
+		static Matrix eulerRotate(float x, float y, float z);
+		
+		static Matrix translate(const Vector &v);
+		static Matrix translate(float x, float y, float z);
+		
+		static Matrix scale(const Vector &v);
+		static Matrix scale(float x, float y, float z);
+
+		static Matrix lookAt(const Vector &v);
+		static Matrix lookAt(float x, float y, float z);
+	};
+}
+
+#include "Vector.hpp"
+
+namespace sw
+{
+	inline Matrix::Matrix()
+	{
+	}
+
+	inline Matrix::Matrix(const int i)
+	{
+		const float s = (float)i;
+
+		Matrix &M = *this;
+
+		M(1, 1) = s; M(1, 2) = 0; M(1, 3) = 0; M(1, 4) = 0;
+		M(2, 1) = 0; M(2, 2) = s; M(2, 3) = 0; M(2, 4) = 0;
+		M(3, 1) = 0; M(3, 2) = 0; M(3, 3) = s; M(3, 4) = 0;
+		M(4, 1) = 0; M(4, 2) = 0; M(4, 3) = 0; M(4, 4) = s;
+	}
+
+	inline Matrix::Matrix(const float m[16])
+	{
+		Matrix &M = *this;
+
+		M(1, 1) = m[0];  M(1, 2) = m[1];  M(1, 3) = m[2];  M(1, 4) = m[3];
+		M(2, 1) = m[4];  M(2, 2) = m[5];  M(2, 3) = m[6];  M(2, 4) = m[7];
+		M(3, 1) = m[8];  M(3, 2) = m[8];  M(3, 3) = m[10]; M(3, 4) = m[11];
+		M(4, 1) = m[12]; M(4, 2) = m[13]; M(4, 3) = m[14]; M(4, 4) = m[15];
+	}
+
+	inline Matrix::Matrix(const float m[4][4])
+	{
+		Matrix &M = *this;
+
+		M[0][0] = m[0][0];  M[0][1] = m[0][1];  M[0][2] = m[0][2];  M[0][3] = m[0][3];
+		M[1][0] = m[1][0];  M[1][1] = m[1][1];  M[1][2] = m[1][2];  M[1][3] = m[1][3];
+		M[2][0] = m[2][0];  M[2][1] = m[2][1];  M[2][2] = m[2][2];  M[2][3] = m[2][3];
+		M[3][0] = m[3][0];  M[3][1] = m[3][1];  M[3][2] = m[3][2];  M[3][3] = m[3][3];
+	}
+
+	inline Matrix::Matrix(float m11, float m12, float m13, 
+	                      float m21, float m22, float m23, 
+	                      float m31, float m32, float m33)
+	{
+		Matrix &M = *this;
+
+		M(1, 1) = m11; M(1, 2) = m12; M(1, 3) = m13; M(1, 4) = 0;
+		M(2, 1) = m21; M(2, 2) = m22; M(2, 3) = m23; M(2, 4) = 0;
+		M(3, 1) = m31; M(3, 2) = m32; M(3, 3) = m33; M(3, 4) = 0;
+		M(4, 1) = 0;   M(4, 2) = 0;   M(4, 3) = 0;   M(4, 4) = 1;
+	}
+
+	inline Matrix::Matrix(float m11, float m12, float m13, float m14, 
+	                      float m21, float m22, float m23, float m24, 
+	                      float m31, float m32, float m33, float m34, 
+	                      float m41, float m42, float m43, float m44)
+	{
+		Matrix &M = *this;
+
+		M(1, 1) = m11; M(1, 2) = m12; M(1, 3) = m13; M(1, 4) = m14;
+		M(2, 1) = m21; M(2, 2) = m22; M(2, 3) = m23; M(2, 4) = m24;
+		M(3, 1) = m31; M(3, 2) = m32; M(3, 3) = m33; M(3, 4) = m34;
+		M(4, 1) = m41; M(4, 2) = m42; M(4, 3) = m43; M(4, 4) = m44;
+	}
+
+	inline Matrix::Matrix(const Vector &v1, const Vector &v2, const Vector &v3)
+	{
+		Matrix &M = *this;
+
+		M(1, 1) = v1.x; M(1, 2) = v2.x; M(1, 3) = v3.x; M(1, 4) = 0;
+		M(2, 1) = v1.y; M(2, 2) = v2.y; M(2, 3) = v3.y; M(2, 4) = 0;
+		M(3, 1) = v1.z; M(3, 2) = v2.z; M(3, 3) = v3.z; M(3, 4) = 0;
+		M(4, 1) = 0;    M(4, 2) = 0;    M(4, 3) = 0;    M(4, 4) = 1;
+	}
+
+	inline Matrix &Matrix::operator=(const Matrix &N)
+	{
+		Matrix &M = *this;
+
+		M(1, 1) = N(1, 1); M(1, 2) = N(1, 2); M(1, 3) = N(1, 3); M(1, 4) = N(1, 4);
+		M(2, 1) = N(2, 1); M(2, 2) = N(2, 2); M(2, 3) = N(2, 3); M(2, 4) = N(2, 4);
+		M(3, 1) = N(3, 1); M(3, 2) = N(3, 2); M(3, 3) = N(3, 3); M(3, 4) = N(3, 4);
+		M(4, 1) = N(4, 1); M(4, 2) = N(4, 2); M(4, 3) = N(4, 3); M(4, 4) = N(4, 4);
+
+		return M;
+	}
+
+	inline float *Matrix::operator[](int i)
+	{
+		return m[i];
+	}
+
+	inline const float *Matrix::operator[](int i) const
+	{
+		return m[i];
+	}
+
+	inline float &Matrix::operator()(int i, int j)
+	{
+		return m[i - 1][j - 1];
+	}
+
+	inline const float &Matrix::operator()(int i, int j) const
+	{
+		return m[i - 1][j - 1];
+	}
+}
+
+#endif   // Matrix_hpp
diff --git a/src/Renderer/PixelProcessor.cpp b/src/Renderer/PixelProcessor.cpp
index bd6ea30..0fbf0ea 100644
--- a/src/Renderer/PixelProcessor.cpp
+++ b/src/Renderer/PixelProcessor.cpp
@@ -1,6 +1,6 @@
 // SwiftShader Software Renderer
 //
-// Copyright(c) 2005-2012 TransGaming Inc.
+// Copyright(c) 2005-2013 TransGaming Inc.
 //
 // All rights reserved. No part of this software may be copied, distributed, transmitted,
 // transcribed, stored in a retrieval system, translated into any human or computer
@@ -19,12 +19,16 @@
 #include "Constants.hpp"
 #include "Debug.hpp"
 
+#include <string.h>
+
 namespace sw
 {
 	extern bool complementaryDepthBuffer;
 	extern Context::TransparencyAntialiasing transparencyAntialiasing;
 	extern bool perspectiveCorrection;
 
+	bool precachePixel = false;
+
 	unsigned int PixelProcessor::States::computeHash()
 	{
 		unsigned int *state = (unsigned int*)this;
@@ -59,7 +63,6 @@
 		                             // Round to nearest LOD [0.7, 1.4]:  0.0
 		                             // Round to lowest LOD  [1.0, 2.0]:  0.5
 
-		precacheDLL = 0;
 		routineCache = 0;
 		setRoutineCacheSize(1024);
 	}
@@ -788,7 +791,7 @@
 	void PixelProcessor::setRoutineCacheSize(int cacheSize)
 	{
 		delete routineCache;
-		routineCache = new LRUCache<State, Routine>(clamp(cacheSize, 1, 65536));
+		routineCache = new RoutineCache<State>(clamp(cacheSize, 1, 65536), precachePixel ? "sw-pixel" : 0);
 	}
 
 	void PixelProcessor::setFogRanges(float start, float end)
@@ -828,7 +831,7 @@
 		{
 			state.alphaCompareMode = context->alphaCompareMode;
 
-			state.transparencyAntialiasing = context->renderTarget[0]->getMultiSampleCount() > 1 ? transparencyAntialiasing : Context::TRANSPARENCY_NONE;
+			state.transparencyAntialiasing = context->getMultiSampleCount() > 1 ? transparencyAntialiasing : Context::TRANSPARENCY_NONE;
 		}
 
 		state.depthWriteEnable = context->depthWriteActive();
@@ -859,8 +862,8 @@
 			state.depthTestActive = true;
 			state.depthCompareMode = context->depthCompareMode;
 			state.quadLayoutDepthBuffer = context->depthStencil->getInternalFormat() != FORMAT_D32F_LOCKABLE &&
-			                              context->depthStencil->getInternalFormat() != FORMAT_D32F_TEXTURE &&
-			                              context->depthStencil->getInternalFormat() != FORMAT_D32F_SHADOW;
+			                              context->depthStencil->getInternalFormat() != FORMAT_D32FS8_TEXTURE &&
+			                              context->depthStencil->getInternalFormat() != FORMAT_D32FS8_SHADOW;
 		}
 
 		state.occlusionEnabled = context->occlusionEnabled;
@@ -891,8 +894,8 @@
 			state.targetFormat[i] = context->renderTargetInternalFormat(i);
 		}
 
-		state.writeSRGB	= context->writeSRGB && Surface::isSRGBwritable(context->renderTarget[0]->getExternalFormat());
-		state.multiSample = context->renderTarget[0]->getMultiSampleCount();
+		state.writeSRGB	= context->writeSRGB && context->renderTarget[0] && Surface::isSRGBwritable(context->renderTarget[0]->getExternalFormat());
+		state.multiSample = context->getMultiSampleCount();
 		state.multiSampleMask = context->multiSampleMask;
 
 		if(state.multiSample > 1 && context->pixelShader)
diff --git a/src/Renderer/PixelProcessor.hpp b/src/Renderer/PixelProcessor.hpp
index 0c0122a..df1876d 100644
--- a/src/Renderer/PixelProcessor.hpp
+++ b/src/Renderer/PixelProcessor.hpp
@@ -13,14 +13,13 @@
 #define sw_PixelProcessor_hpp

 

 #include "Context.hpp"

-#include "LRUCache.hpp"

+#include "RoutineCache.hpp"

 

 namespace sw

 {

 	class PixelShader;

 	class Rasterizer;

 	struct Texture;

-	class Routine;

 	struct DrawData;

 

 	class PixelProcessor

@@ -179,7 +178,7 @@
 		};

 

 	public:

-		typedef void (__cdecl *RoutinePointer)(const Primitive *primitive, int count, int thread, DrawData *draw);

+		typedef void (*RoutinePointer)(const Primitive *primitive, int count, int thread, DrawData *draw);

 

 		PixelProcessor(Context *context);

 

@@ -302,8 +301,7 @@
 

 		Context *const context;

 

-		LRUCache<State, Routine> *routineCache;

-		HMODULE precacheDLL;

+		RoutineCache<State> *routineCache;

 	};

 }

 

diff --git a/src/Renderer/Plane.cpp b/src/Renderer/Plane.cpp
index 8c0fbe4..ab9ea77 100644
--- a/src/Renderer/Plane.cpp
+++ b/src/Renderer/Plane.cpp
@@ -12,8 +12,6 @@
 #include "Plane.hpp"
 
 #include "Matrix.hpp"
-#include "Point.hpp"
-#include "Math.hpp"
 
 namespace sw
 {
@@ -21,30 +19,6 @@
 	{
 	}
 
-	Plane::Plane(const Plane &p)
-	{
-		n = p.n;
-		D = p.D;
-	}
-
-	Plane::Plane(const Vector &p_n, float p_D)
-	{
-		n = p_n;
-		D = p_D;
-	}
-
-	Plane::Plane(const Vector &p_n, const Point &P)
-	{
-		n = p_n;
-		D = -(n * P);
-	}
-
-	Plane::Plane(const Point &P0, const Point &P1, const Point &P2)
-	{
-		n = (P1 - P0) % (P2 - P0);
-		D = -(n * P0);
-	}
-
 	Plane::Plane(float p_A, float p_B, float p_C, float p_D)
 	{
 		A = p_A;
@@ -61,29 +35,6 @@
 		D = ABCD[3];
 	}
 
-	Plane Plane::operator+() const
-	{
-		return *this;
-	}
-
-	Plane Plane::operator-() const
-	{
-		return Plane(-n, -D);
-	}
-
-	Plane &Plane::operator=(const Plane &p)
-	{
-		n = p.n;
-		D = p.D;
-
-		return *this;
-	}
-
-	Plane &Plane::operator*=(const Matrix &M)
-	{
-		return *this = *this * M;
-	}
-
 	Plane operator*(const Plane &p, const Matrix &T)
 	{
 		Matrix M = !T;
@@ -103,34 +54,4 @@
 		             M(1, 3) * p.A + M(2, 3) * p.B + M(3, 3) * p.C + M(4, 3) * p.D,
 		             M(1, 4) * p.A + M(2, 4) * p.B + M(3, 4) * p.C + M(4, 4) * p.D);
 	}
-
-	float operator^(const Plane &p1, const Plane &p2)
-	{
-		return acos(p1.n * p2.n / (Vector::N(p1.n) * Vector::N(p2.n)));
-	}
-
-	float Plane::d(const Point &P) const
-	{
-		return P * n + D;
-	}
-
-	float Plane::d(const Point &P, const Plane &p)
-	{
-		return P * p.n + p.D;
-	}
-
-	float Plane::d(const Plane &p, const Point &P)
-	{
-		return p.n * P + p.D;
-	}
-
-	Plane &Plane::normalise()
-	{
-		float l = Vector::N(n);
-
-		n /= l;
-		D /= l;
-
-		return *this;
-	}
 }
diff --git a/src/Renderer/Plane.hpp b/src/Renderer/Plane.hpp
index 0c0ef32..f831339 100644
--- a/src/Renderer/Plane.hpp
+++ b/src/Renderer/Plane.hpp
@@ -17,52 +17,20 @@
 namespace sw

 {

 	struct Matrix;

-	struct Point;

 

 	struct Plane

 	{

-		union

-		{

-			struct

-			{

-				float A;

-				float B;

-				float C;

-			};

-			struct

-			{

-				Vector n;

-			};

-		};

-

-		float D;   // Distance to origin along normal

+		float A;

+		float B;

+		float C;

+		float D;

 

 		Plane();

-		Plane(const Plane &p);

-		Plane(const Vector &n, float D);   // Normal and distance to origin

-		Plane(const Vector &n, const Point &P);   // Normal and point on plane

-		Plane(const Point &P0, const Point &P1, const Point &P2);   // Through three points

 		Plane(float A, float B, float C, float D);   // Plane equation 

 		Plane(const float ABCD[4]);

 

-		Plane &operator=(const Plane &p);

-

-		Plane operator+() const;

-		Plane operator-() const;   // Flip normal

-

-		Plane &operator*=(const Matrix &A);   // Transform plane by matrix (post-multiply)

-

 		friend Plane operator*(const Plane &p, const Matrix &A);   // Transform plane by matrix (post-multiply)

 		friend Plane operator*(const Matrix &A, const Plane &p);   // Transform plane by matrix (pre-multiply)

-

-		friend float operator^(const Plane &p1, const Plane &p2);   // Angle between planes

-

-		float d(const Point &P) const;   // Oriented distance between point and plane

-

-		static float d(const Point &P, const Plane &p);   // Oriented distance between point and plane

-		static float d(const Plane &p, const Point &P);   // Oriented distance between plane and point

-

-		Plane &normalise();   // Normalise the Plane equation

 	};

 }

 

diff --git a/src/Renderer/Point.hpp b/src/Renderer/Point.hpp
index 1448ee0..7b50f58 100644
--- a/src/Renderer/Point.hpp
+++ b/src/Renderer/Point.hpp
@@ -1,136 +1,136 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2011 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef Point_hpp

-#define Point_hpp

-

-namespace sw

-{

-	struct Vector;

-	struct Matrix;

-

-	struct Point

-	{

-		Point();

-		Point(const int i);

-		Point(const Point &P);

-		Point(const Vector &v);

-		Point(float Px, float Py, float Pz);

-

-		Point &operator=(const Point &P);

-

-		union

-		{

-			float p[3];

-

-			struct

-			{		

-				float x;

-				float y;

-				float z;

-			};

-		};

-

-		float &operator[](int i);

-		float &operator()(int i);

-

-		const float &operator[](int i) const;

-		const float &operator()(int i) const;

-

-		Point &operator+=(const Vector &v);

-		Point &operator-=(const Vector &v);

-

-		friend Point operator+(const Point &P, const Vector &v);

-		friend Point operator-(const Point &P, const Vector &v);

-

-		friend Vector operator-(const Point &P, const Point &Q);

-

-		friend Point operator*(const Matrix &M, const Point& P);

-		friend Point operator*(const Point &P, const Matrix &M);

-		friend Point &operator*=(Point &P, const Matrix &M);

-

-		float d(const Point &P) const;   // Distance between two points

-		float d2(const Point &P) const;   // Squared distance between two points

-

-		static float d(const Point &P, const Point &Q);   // Distance between two points

-		static float d2(const Point &P, const Point &Q);   // Squared distance between two points

-	};

-}

-

-#include "Vector.hpp"

-

-namespace sw

-{

-	inline Point::Point()

-	{

-	}

-

-	inline Point::Point(const int i)

-	{

-		const float s = (float)i;

-

-		x = s;

-		y = s;

-		z = s;

-	}

-

-	inline Point::Point(const Point &P)

-	{

-		x = P.x;

-		y = P.y;

-		z = P.z;

-	}

-

-	inline Point::Point(const Vector &v)

-	{

-		x = v.x;

-		y = v.y;

-		z = v.z;

-	}

-

-	inline Point::Point(float P_x, float P_y, float P_z)

-	{

-		x = P_x;

-		y = P_y;

-		z = P_z;

-	}

-

-	inline Point &Point::operator=(const Point &P)

-	{

-		x = P.x;

-		y = P.y;

-		z = P.z;

-

-		return *this;

-	}

-

-	inline float &Point::operator()(int i)

-	{

-		return p[i];

-	}

-

-	inline float &Point::operator[](int i)

-	{

-		return p[i];

-	}

-

-	inline const float &Point::operator()(int i) const

-	{

-		return p[i];

-	}

-

-	inline const float &Point::operator[](int i) const

-	{

-		return p[i];

-	}

-}

-

-#endif   // Point_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2011 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef Point_hpp
+#define Point_hpp
+
+namespace sw
+{
+	struct Vector;
+	struct Matrix;
+
+	struct Point
+	{
+		Point();
+		Point(const int i);
+		Point(const Point &P);
+		Point(const Vector &v);
+		Point(float Px, float Py, float Pz);
+
+		Point &operator=(const Point &P);
+
+		union
+		{
+			float p[3];
+
+			struct
+			{		
+				float x;
+				float y;
+				float z;
+			};
+		};
+
+		float &operator[](int i);
+		float &operator()(int i);
+
+		const float &operator[](int i) const;
+		const float &operator()(int i) const;
+
+		Point &operator+=(const Vector &v);
+		Point &operator-=(const Vector &v);
+
+		friend Point operator+(const Point &P, const Vector &v);
+		friend Point operator-(const Point &P, const Vector &v);
+
+		friend Vector operator-(const Point &P, const Point &Q);
+
+		friend Point operator*(const Matrix &M, const Point& P);
+		friend Point operator*(const Point &P, const Matrix &M);
+		friend Point &operator*=(Point &P, const Matrix &M);
+
+		float d(const Point &P) const;   // Distance between two points
+		float d2(const Point &P) const;   // Squared distance between two points
+
+		static float d(const Point &P, const Point &Q);   // Distance between two points
+		static float d2(const Point &P, const Point &Q);   // Squared distance between two points
+	};
+}
+
+#include "Vector.hpp"
+
+namespace sw
+{
+	inline Point::Point()
+	{
+	}
+
+	inline Point::Point(const int i)
+	{
+		const float s = (float)i;
+
+		x = s;
+		y = s;
+		z = s;
+	}
+
+	inline Point::Point(const Point &P)
+	{
+		x = P.x;
+		y = P.y;
+		z = P.z;
+	}
+
+	inline Point::Point(const Vector &v)
+	{
+		x = v.x;
+		y = v.y;
+		z = v.z;
+	}
+
+	inline Point::Point(float P_x, float P_y, float P_z)
+	{
+		x = P_x;
+		y = P_y;
+		z = P_z;
+	}
+
+	inline Point &Point::operator=(const Point &P)
+	{
+		x = P.x;
+		y = P.y;
+		z = P.z;
+
+		return *this;
+	}
+
+	inline float &Point::operator()(int i)
+	{
+		return p[i];
+	}
+
+	inline float &Point::operator[](int i)
+	{
+		return p[i];
+	}
+
+	inline const float &Point::operator()(int i) const
+	{
+		return p[i];
+	}
+
+	inline const float &Point::operator[](int i) const
+	{
+		return p[i];
+	}
+}
+
+#endif   // Point_hpp
diff --git a/src/Renderer/QuadRasterizer.cpp b/src/Renderer/QuadRasterizer.cpp
index d4548fe..a11cf18 100644
--- a/src/Renderer/QuadRasterizer.cpp
+++ b/src/Renderer/QuadRasterizer.cpp
@@ -34,7 +34,7 @@
 
 	void QuadRasterizer::generate()
 	{
-		Function<Void, Pointer<Byte>, Int, Int, Pointer<Byte>> function;
+		Function<Void, Pointer<Byte>, Int, Int, Pointer<Byte> > function;
 		{
 			#if PERF_PROFILE
 				Long pixelTime = Ticks();
@@ -46,7 +46,7 @@
 			Pointer<Byte> data(function.arg(3));
 
 			Registers r(shader);
-			r.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
+			r.constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
 			r.cluster = cluster;
 			r.data = data;
 			
@@ -104,18 +104,18 @@
 		{
 			if(state.colorWriteActive(index))
 			{
-				cBuffer[index] = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
+				cBuffer[index] = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
 			}
 		}
 
 		if(state.depthTestActive)
 		{
-			zBuffer = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
+			zBuffer = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
 		}
 
 		if(state.stencilActive)
 		{
-			sBuffer = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,stencilPitchB));
+			sBuffer = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,stencilPitchB));
 		}
 
 		Int y = yMin;
diff --git a/src/Renderer/QuadRasterizer.hpp b/src/Renderer/QuadRasterizer.hpp
index 4edfeac..36184cc 100644
--- a/src/Renderer/QuadRasterizer.hpp
+++ b/src/Renderer/QuadRasterizer.hpp
@@ -1,34 +1,34 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2011 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef sw_QuadRasterizer_hpp

-#define sw_QuadRasterizer_hpp

-

-#include "Rasterizer.hpp"

-#include "PixelRoutine.hpp"

-

-namespace sw

-{

-	class QuadRasterizer : public PixelRoutine

-	{

-	public:

-		QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader);

-

-		virtual ~QuadRasterizer();

-

-	private:

-		void generate();

-

-		void rasterize(Registers &r, Int &yMin, Int &yMax);

-	};

-}

-

-#endif   // sw_QuadRasterizer_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2011 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef sw_QuadRasterizer_hpp
+#define sw_QuadRasterizer_hpp
+
+#include "Rasterizer.hpp"
+#include "PixelRoutine.hpp"
+
+namespace sw
+{
+	class QuadRasterizer : public PixelRoutine
+	{
+	public:
+		QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader);
+
+		virtual ~QuadRasterizer();
+
+	private:
+		void generate();
+
+		void rasterize(Registers &r, Int &yMin, Int &yMax);
+	};
+}
+
+#endif   // sw_QuadRasterizer_hpp
diff --git a/src/Renderer/Rasterizer.hpp b/src/Renderer/Rasterizer.hpp
index 061b399..0037379 100644
--- a/src/Renderer/Rasterizer.hpp
+++ b/src/Renderer/Rasterizer.hpp
@@ -1,39 +1,39 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2011 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef sw_Rasterizer_hpp

-#define sw_Rasterizer_hpp

-

-#include "Context.hpp"

-

-#include "PixelProcessor.hpp"

-#include "Config.hpp"

-

-namespace sw

-{

-	class Rasterizer

-	{

-	public:

-		Rasterizer(const PixelProcessor::State &state);

-

-		virtual ~Rasterizer();

-

-		virtual void generate() = 0;

-		Routine *getRoutine();

-

-	protected:

-		Routine *routine;

-

-		const PixelProcessor::State &state;

-	};

-}

-

-#endif   // sw_Rasterizer_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2011 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef sw_Rasterizer_hpp
+#define sw_Rasterizer_hpp
+
+#include "Context.hpp"
+
+#include "PixelProcessor.hpp"
+#include "Config.hpp"
+
+namespace sw
+{
+	class Rasterizer
+	{
+	public:
+		Rasterizer(const PixelProcessor::State &state);
+
+		virtual ~Rasterizer();
+
+		virtual void generate() = 0;
+		Routine *getRoutine();
+
+	protected:
+		Routine *routine;
+
+		const PixelProcessor::State &state;
+	};
+}
+
+#endif   // sw_Rasterizer_hpp
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index 6a5195d..030ab6b 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -30,7 +30,6 @@
 
 #include <malloc.h>
 #include <assert.h>
-#include <float.h>
 
 #undef max
 
@@ -55,6 +54,10 @@
 	extern Context::TransparencyAntialiasing transparencyAntialiasing;
 	extern bool forceClearRegisters;
 
+	extern bool precacheVertex;
+	extern bool precacheSetup;
+	extern bool precachePixel;
+
 	int batchSize = 128;
 	int threadCount = 1;
 	int unitCount = 1;
@@ -204,8 +207,8 @@
 		updateConfiguration();
 		updateClipper();
 
-		int ss = context->renderTarget[0]->getSuperSampleCount();
-		int ms = context->renderTarget[0]->getMultiSampleCount();
+		int ss = context->getSuperSampleCount();
+		int ms = context->getMultiSampleCount();
 
 		for(int q = 0; q < ss; q++)
 		{
@@ -287,7 +290,7 @@
 			{
 				for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
 				{
-					InterlockedIncrement((volatile long*)&(*query)->reference);
+					atomicIncrement(&(*query)->reference);
 				}
 
 				draw->queries = new std::list<Query*>(queries);
@@ -607,7 +610,8 @@
 
 		if(logPrecision < IEEE)
 		{
-			_controlfp(_DN_FLUSH, _MCW_DN);
+			CPUID::setFlushToZero(true);
+			CPUID::setDenormalsAreZero(true);
 		}
 
 		renderer->threadLoop(threadIndex);
@@ -847,11 +851,11 @@
 			pixelProgress[cluster].processedPrimitives = 0;
 		}
 
-		int ref = InterlockedDecrement((volatile long*)&primitiveProgress[unit].references);
+		int ref = atomicDecrement(&primitiveProgress[unit].references);
 
 		if(ref == 0)
 		{
-			ref = InterlockedDecrement((volatile long*)&draw.references);
+			ref = atomicDecrement(&draw.references);
 
 			if(ref == 0)
 			{
@@ -873,10 +877,10 @@
 
 						for(int cluster = 0; cluster < clusterCount; cluster++)
 						{
-							InterlockedExchangeAdd((volatile long*)&query->data, data.occlusion[cluster]);
+							atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
 						}
 
-						InterlockedDecrement((volatile long*)&query->reference);
+						atomicDecrement(&query->reference);
 					}
 
 					delete draw.queries;
@@ -1178,37 +1182,37 @@
 			break;
 		case Context::DRAW_INDEXEDLINELOOP8:
 			{
-				const unsigned char *index = (const unsigned char*)indices + start;
+				const unsigned char *index = (const unsigned char*)indices;
 
 				for(unsigned int i = 0; i < count; i++)
 				{
-					batch[i][0] = index[(i + 0) % loop];
-					batch[i][1] = index[(i + 1) % loop];
-					batch[i][2] = index[(i + 1) % loop];
+					batch[i][0] = index[(start + i + 0) % loop];
+					batch[i][1] = index[(start + i + 1) % loop];
+					batch[i][2] = index[(start + i + 1) % loop];
 				}
 			}
 			break;
 		case Context::DRAW_INDEXEDLINELOOP16:
 			{
-				const unsigned short *index = (const unsigned short*)indices + start;
+				const unsigned short *index = (const unsigned short*)indices;
 
 				for(unsigned int i = 0; i < count; i++)
 				{
-					batch[i][0] = index[(i + 0) % loop];
-					batch[i][1] = index[(i + 1) % loop];
-					batch[i][2] = index[(i + 1) % loop];
+					batch[i][0] = index[(start + i + 0) % loop];
+					batch[i][1] = index[(start + i + 1) % loop];
+					batch[i][2] = index[(start + i + 1) % loop];
 				}
 			}
 			break;
 		case Context::DRAW_INDEXEDLINELOOP32:
 			{
-				const unsigned int *index = (const unsigned int*)indices + start;
+				const unsigned int *index = (const unsigned int*)indices;
 
 				for(unsigned int i = 0; i < count; i++)
 				{
-					batch[i][0] = index[(i + 0) % loop];
-					batch[i][1] = index[(i + 1) % loop];
-					batch[i][2] = index[(i + 1) % loop];
+					batch[i][0] = index[(start + i + 0) % loop];
+					batch[i][1] = index[(start + i + 1) % loop];
+					batch[i][2] = index[(start + i + 1) % loop];
 				}
 			}
 			break;
@@ -1421,9 +1425,9 @@
 			for(int i = 0; i < 2; i++)
 			{
 				triangle[1].v0.C[i] = triangle[0].v0.C[i];
-				triangle[1].v1.C[i] = triangle[0].v1.C[i];
+				triangle[1].v1.C[i] = triangle[0].v0.C[i];
 				triangle[2].v0.C[i] = triangle[0].v0.C[i];
-				triangle[2].v1.C[i] = triangle[0].v1.C[i];
+				triangle[2].v1.C[i] = triangle[0].v0.C[i];
 			}
 		}
 
@@ -2426,6 +2430,10 @@
 			SwiftConfig::Configuration configuration = {0};
 			swiftConfig->getConfiguration(configuration);
 
+			precacheVertex = !newConfiguration && configuration.precache;
+			precacheSetup = !newConfiguration && configuration.precache;
+			precachePixel = !newConfiguration && configuration.precache;
+
 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
diff --git a/src/Renderer/Renderer.hpp b/src/Renderer/Renderer.hpp
index 2bdcdb9..b0e76a3 100644
--- a/src/Renderer/Renderer.hpp
+++ b/src/Renderer/Renderer.hpp
@@ -1,433 +1,433 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2012 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef sw_Renderer_hpp

-#define sw_Renderer_hpp

-

-#include "VertexProcessor.hpp"

-#include "PixelProcessor.hpp"

-#include "SetupProcessor.hpp"

-#include "Plane.hpp"

-#include "Blitter.hpp"

-#include "Common/MutexLock.hpp"

-#include "Common/Thread.hpp"

-#include "Main/Config.hpp"

-

-#include <list>

-

-namespace sw

-{

-	class Clipper;

-	class PixelShader;

-	class VertexShader;

-	class SwiftConfig;

-	struct Task;

-	class Resource;

-	class Renderer;

-

-	extern int batchSize;

-	extern int threadCount;

-	extern int unitCount;

-	extern int clusterCount;

-

-	enum TranscendentalPrecision

-	{

-		APPROXIMATE,

-		PARTIAL,	// 2^-10

-		ACCURATE,

-		WHQL,		// 2^-21

-		IEEE		// 2^-23

-	};

-

-	extern TranscendentalPrecision logPrecision;

-	extern TranscendentalPrecision expPrecision;

-	extern TranscendentalPrecision rcpPrecision;

-	extern TranscendentalPrecision rsqPrecision;

-	extern bool perspectiveCorrection;

-

-	struct Query

-	{

-		Query()

-		{

-			building = false;

-			reference = 0;

-			data = 0;

-		}

-

-		void begin()

-		{

-			building = true;

-			data = 0;

-		}

-

-		void end()

-		{

-			building = false;

-		}

-

-		bool building;

-		volatile int reference;

-		volatile unsigned int data;

-	};

-

-	struct DrawData

-	{

-		const void *constants;

-

-		const void *input[16];

-		unsigned int stride[16];

-		Texture mipmap[16 + 4];

-		const void *indices;

-

-		struct VS

-		{

-			float4 c[256 + 1];   // One extra for indices out of range, c[256] = {0, 0, 0, 0}

-			int4 i[16];

-			bool b[16];

-		};

-

-		struct PS

-		{

-			word4 cW[8][4];

-			float4 c[224];

-			int4 i[16];

-			bool b[16];

-		};

-

-		union

-		{

-			VS vs;

-			VertexProcessor::FixedFunction ff;

-		};

-

-		PS ps;

-

-		VertexProcessor::PointSprite point;

-

-		PixelProcessor::Stencil stencil[2];   // clockwise, counterclockwise

-		PixelProcessor::Stencil stencilCCW;

-		PixelProcessor::Fog fog;

-		PixelProcessor::Factor factor;

-		unsigned int occlusion[16];   // Number of pixels passing depth test

-

-		#if PERF_PROFILE

-			int64_t cycles[PERF_TIMERS][16];

-		#endif

-

-		TextureStage::Uniforms textureStage[8];

-

-		float4 Wx16;

-		float4 Hx16;

-		float4 X0x16;

-		float4 Y0x16;

-		float4 XXXX;

-		float4 YYYY;

-		float4 halfPixelX;

-		float4 halfPixelY;

-		float viewportHeight;

-		float slopeDepthBias;

-		float depthRange;

-		float depthNear;

-		Plane clipPlane[6];

-

-		unsigned int *colorBuffer[4];

-		int colorPitchB[4];

-		int colorSliceB[4];

-		float *depthBuffer;

-		int depthPitchB;

-		int depthSliceB;

-		unsigned char *stencilBuffer;

-		int stencilPitchB;

-		int stencilSliceB;

-

-		int scissorX0;

-		int scissorX1;

-		int scissorY0;

-		int scissorY1;

-

-		float4 a2c0;

-		float4 a2c1;

-		float4 a2c2;

-		float4 a2c3;

-	};

-

-	struct DrawCall

-	{

-		DrawCall();

-

-		~DrawCall();

-

-		Context::DrawType drawType;

-		int batchSize;

-

-		Routine *vertexRoutine;

-		Routine *setupRoutine;

-		Routine *pixelRoutine;

-

-		VertexProcessor::RoutinePointer vertexPointer;

-		SetupProcessor::RoutinePointer setupPointer;

-		PixelProcessor::RoutinePointer pixelPointer;

-

-		int (*setupPrimitives)(Renderer *renderer, int batch, int count);

-		SetupProcessor::State setupState;

-

-		Resource *vertexStream[16];

-		Resource *indexBuffer;

-		Surface *renderTarget[4];

-		Surface *depthStencil;

-		Resource *texture[16 + 4];

-

-		int vsDirtyConstF;

-		int vsDirtyConstI;

-		int vsDirtyConstB;

-

-		int psDirtyConstF;

-		int psDirtyConstI;

-		int psDirtyConstB;

-

-		std::list<Query*> *queries;

-

-		int clipFlags;

-

-		volatile int primitive;    // Current primitive to enter pipeline

-		volatile int count;        // Number of primitives to render

-		volatile int references;   // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free

-

-		DrawData *data;

-	};

-

-	struct Viewport

-	{

-		float x0;

-		float y0;

-		float width;

-		float height;

-		float minZ;

-		float maxZ;

-	};

-

-	class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor

-	{

-		struct Task

-		{

-			enum Type

-			{

-				PRIMITIVES,

-				PIXELS,

-

-				RESUME,

-				SUSPEND

-			};

-

-			volatile Type type;

-			volatile int primitiveUnit;

-			volatile int pixelCluster;

-		};

-

-		struct PrimitiveProgress

-		{

-			void init()

-			{

-				drawCall = 0;

-				firstPrimitive = 0;

-				primitiveCount = 0;

-				visible = 0;

-				references = 0;

-			}

-

-			volatile int drawCall;

-			volatile int firstPrimitive;

-			volatile int primitiveCount;

-			volatile int visible;

-			volatile int references;

-		};

-

-		struct PixelProgress

-		{

-			void init()

-			{

-				drawCall = 0;

-				processedPrimitives = 0;

-				executing = false;

-			}

-

-			volatile int drawCall;

-			volatile int processedPrimitives;

-			volatile bool executing;

-		};

-

-	public:

-		Renderer(Context *context, bool halfIntegerCoordinates, bool symmetricNormalizedDepth, bool booleanFaceRegister, bool fullPixelPositionRegister, bool exactColorRounding);

-

-		virtual ~Renderer();

-

-		virtual void blit(Surface *source, const Rect &sRect, Surface *dest, const Rect &dRect, bool filter);

-		virtual void draw(Context::DrawType drawType, unsigned int indexOffset, unsigned int count, bool update = true);

-

-		virtual void setIndexBuffer(Resource *indexBuffer);

-

-		virtual void setMultiSampleMask(unsigned int mask);

-		virtual void setTransparencyAntialiasing(Context::TransparencyAntialiasing transparencyAntialiasing);

-

-		virtual void setTextureResource(unsigned int sampler, Resource *resource);

-		virtual void setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type);

-

-		virtual void setTextureFilter(SamplerType type, int sampler, FilterType textureFilter);

-		virtual void setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter);

-		virtual void setGatherEnable(SamplerType type, int sampler, bool enable);

-		virtual void setAddressingModeU(SamplerType type, int sampler, AddressingMode addressingMode);

-		virtual void setAddressingModeV(SamplerType type, int sampler, AddressingMode addressingMode);

-		virtual void setAddressingModeW(SamplerType type, int sampler, AddressingMode addressingMode);

-		virtual void setReadSRGB(SamplerType type, int sampler, bool sRGB);

-		virtual void setMipmapLOD(SamplerType type, int sampler, float bias);

-		virtual void setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor);

-		virtual void setMaxAnisotropy(SamplerType type, int sampler, unsigned int maxAnisotropy);

-		

-		virtual void setPointSpriteEnable(bool pointSpriteEnable);

-		virtual void setPointScaleEnable(bool pointScaleEnable);

-

-		virtual void setDepthBias(float bias);

-		virtual void setSlopeDepthBias(float slopeBias);

-

-		// Programmable pipelines

-		virtual void setPixelShader(const PixelShader *shader);

-		virtual void setVertexShader(const VertexShader *shader);

-

-		virtual void setPixelShaderConstantF(int index, const float value[4], int count = 1);

-		virtual void setPixelShaderConstantI(int index, const int value[4], int count = 1);

-		virtual void setPixelShaderConstantB(int index, const int *boolean, int count = 1);

-

-		virtual void setVertexShaderConstantF(int index, const float value[4], int count = 1);

-		virtual void setVertexShaderConstantI(int index, const int value[4], int count = 1);

-		virtual void setVertexShaderConstantB(int index, const int *boolean, int count = 1);

-

-		// Viewport & Clipper

-		virtual void setViewport(const Viewport &viewport);

-		virtual void setScissor(const Rect &scissor);

-		virtual void setClipFlags(int flags);

-		virtual void setClipPlane(unsigned int index, const float plane[4]);

-

-		// Partial transform

-		virtual void setModelMatrix(const Matrix &M, int i = 0);

-		virtual void setViewMatrix(const Matrix &V);

-		virtual void setBaseMatrix(const Matrix &B);

-		virtual void setProjectionMatrix(const Matrix &P);

-

-		virtual void addQuery(Query *query);

-		virtual void removeQuery(Query *query);

-

-		void synchronize();

-

-		#if PERF_HUD

-			// Performance timers

-			int getThreadCount();

-			int64_t getVertexTime(int thread);

-			int64_t getSetupTime(int thread);

-			int64_t getPixelTime(int thread);

-			void resetTimers();

-		#endif

-

-	private:

-		static void threadFunction(void *parameters);

-		void threadLoop(int threadIndex);

-		void taskLoop(int threadIndex);

-		void findAvailableTasks();

-		void scheduleTask(int threadIndex);

-		void executeTask(int threadIndex);

-		void finishRendering(Task &pixelTask);

-

-		void processPrimitiveVertices(int unit, unsigned int start, unsigned int count, unsigned int loop, int thread);

-

-		static int setupSolidTriangles(Renderer *renderer, int batch, int count);

-		static int setupWireframeTriangle(Renderer *renderer, int batch, int count);

-		static int setupVertexTriangle(Renderer *renderer, int batch, int count);

-		static int setupLines(Renderer *renderer, int batch, int count);

-		static int setupPoints(Renderer *renderer, int batch, int count);

-

-		static bool setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw);

-		static bool setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw);

-

-		bool isReadWriteTexture(int sampler);

-		void updateClipper();

-		void updateConfiguration(bool initialUpdate = false);

-		static unsigned int computeClipFlags(const float4 &v, const DrawData &data);

-		void initializeThreads(int threadCount);

-		void terminateThreads();

-		void deleteBatches();

-

-		void loadConstants(const VertexShader *vertexShader);

-		void loadConstants(const PixelShader *pixelShader);

-

-		Context *context;

-		Clipper *clipper;

-		Viewport viewport;

-		Rect scissor;

-		int clipFlags;

-

-		Triangle *triangleBatch[16];

-		Primitive *primitiveBatch[16];

-

-		// User-defined clipping planes

-		Plane userPlane[6];

-		Plane clipPlane[6];   // Tranformed to clip space

-		bool updateClipPlanes;

-

-		volatile bool exitThreads;

-		volatile int threadsAwake;

-		Thread *worker[16];

-		Event *resume[16];         // Events for resuming threads

-		Event *suspend[16];        // Events for suspending threads

-		Event *resumeApp;          // Event for resuming the application thread

-

-		PrimitiveProgress primitiveProgress[16];

-		PixelProgress pixelProgress[16];

-		Task task[16];   // Current tasks for threads

-

-		enum {DRAW_COUNT = 16};   // Number of draw calls buffered

-		DrawCall *drawCall[DRAW_COUNT];

-		DrawCall *drawList[DRAW_COUNT];

-

-		volatile int currentDraw;

-		volatile int nextDraw;

-

-		Task taskQueue[32];

-		unsigned int qHead;

-		unsigned int qSize;

-

-		BackoffLock mutex;

-

-		#if PERF_HUD

-			int64_t vertexTime[16];

-			int64_t setupTime[16];

-			int64_t pixelTime[16];

-		#endif

-

-		VertexTask *vertexTask[16];

-

-		SwiftConfig *swiftConfig;

-

-		std::list<Query*> queries;

-		Resource *sync;

-

-		VertexProcessor::State vertexState;

-		SetupProcessor::State setupState;

-		PixelProcessor::State pixelState;

-		int (*setupPrimitives)(Renderer *renderer, int batch, int count);

-

-		Routine *vertexRoutine;

-		Routine *setupRoutine;

-		Routine *pixelRoutine;

-

-		Blitter blitter;

-	};

-}

-

-#endif   // sw_Renderer_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2012 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef sw_Renderer_hpp
+#define sw_Renderer_hpp
+
+#include "VertexProcessor.hpp"
+#include "PixelProcessor.hpp"
+#include "SetupProcessor.hpp"
+#include "Plane.hpp"
+#include "Blitter.hpp"
+#include "Common/MutexLock.hpp"
+#include "Common/Thread.hpp"
+#include "Main/Config.hpp"
+
+#include <list>
+
+namespace sw
+{
+	class Clipper;
+	class PixelShader;
+	class VertexShader;
+	class SwiftConfig;
+	struct Task;
+	class Resource;
+	class Renderer;
+
+	extern int batchSize;
+	extern int threadCount;
+	extern int unitCount;
+	extern int clusterCount;
+
+	enum TranscendentalPrecision
+	{
+		APPROXIMATE,
+		PARTIAL,	// 2^-10
+		ACCURATE,
+		WHQL,		// 2^-21
+		IEEE		// 2^-23
+	};
+
+	extern TranscendentalPrecision logPrecision;
+	extern TranscendentalPrecision expPrecision;
+	extern TranscendentalPrecision rcpPrecision;
+	extern TranscendentalPrecision rsqPrecision;
+	extern bool perspectiveCorrection;
+
+	struct Query
+	{
+		Query()
+		{
+			building = false;
+			reference = 0;
+			data = 0;
+		}
+
+		void begin()
+		{
+			building = true;
+			data = 0;
+		}
+
+		void end()
+		{
+			building = false;
+		}
+
+		bool building;
+		volatile int reference;
+		volatile unsigned int data;
+	};
+
+	struct DrawData
+	{
+		const void *constants;
+
+		const void *input[16];
+		unsigned int stride[16];
+		Texture mipmap[16 + 4];
+		const void *indices;
+
+		struct VS
+		{
+			float4 c[256 + 1];   // One extra for indices out of range, c[256] = {0, 0, 0, 0}
+			int4 i[16];
+			bool b[16];
+		};
+
+		struct PS
+		{
+			word4 cW[8][4];
+			float4 c[224];
+			int4 i[16];
+			bool b[16];
+		};
+
+		union
+		{
+			VS vs;
+			VertexProcessor::FixedFunction ff;
+		};
+
+		PS ps;
+
+		VertexProcessor::PointSprite point;
+
+		PixelProcessor::Stencil stencil[2];   // clockwise, counterclockwise
+		PixelProcessor::Stencil stencilCCW;
+		PixelProcessor::Fog fog;
+		PixelProcessor::Factor factor;
+		unsigned int occlusion[16];   // Number of pixels passing depth test
+
+		#if PERF_PROFILE
+			int64_t cycles[PERF_TIMERS][16];
+		#endif
+
+		TextureStage::Uniforms textureStage[8];
+
+		float4 Wx16;
+		float4 Hx16;
+		float4 X0x16;
+		float4 Y0x16;
+		float4 XXXX;
+		float4 YYYY;
+		float4 halfPixelX;
+		float4 halfPixelY;
+		float viewportHeight;
+		float slopeDepthBias;
+		float depthRange;
+		float depthNear;
+		Plane clipPlane[6];
+
+		unsigned int *colorBuffer[4];
+		int colorPitchB[4];
+		int colorSliceB[4];
+		float *depthBuffer;
+		int depthPitchB;
+		int depthSliceB;
+		unsigned char *stencilBuffer;
+		int stencilPitchB;
+		int stencilSliceB;
+
+		int scissorX0;
+		int scissorX1;
+		int scissorY0;
+		int scissorY1;
+
+		float4 a2c0;
+		float4 a2c1;
+		float4 a2c2;
+		float4 a2c3;
+	};
+
+	struct DrawCall
+	{
+		DrawCall();
+
+		~DrawCall();
+
+		Context::DrawType drawType;
+		int batchSize;
+
+		Routine *vertexRoutine;
+		Routine *setupRoutine;
+		Routine *pixelRoutine;
+
+		VertexProcessor::RoutinePointer vertexPointer;
+		SetupProcessor::RoutinePointer setupPointer;
+		PixelProcessor::RoutinePointer pixelPointer;
+
+		int (*setupPrimitives)(Renderer *renderer, int batch, int count);
+		SetupProcessor::State setupState;
+
+		Resource *vertexStream[16];
+		Resource *indexBuffer;
+		Surface *renderTarget[4];
+		Surface *depthStencil;
+		Resource *texture[16 + 4];
+
+		int vsDirtyConstF;
+		int vsDirtyConstI;
+		int vsDirtyConstB;
+
+		int psDirtyConstF;
+		int psDirtyConstI;
+		int psDirtyConstB;
+
+		std::list<Query*> *queries;
+
+		int clipFlags;
+
+		volatile int primitive;    // Current primitive to enter pipeline
+		volatile int count;        // Number of primitives to render
+		volatile int references;   // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
+
+		DrawData *data;
+	};
+
+	struct Viewport
+	{
+		float x0;
+		float y0;
+		float width;
+		float height;
+		float minZ;
+		float maxZ;
+	};
+
+	class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor
+	{
+		struct Task
+		{
+			enum Type
+			{
+				PRIMITIVES,
+				PIXELS,
+
+				RESUME,
+				SUSPEND
+			};
+
+			volatile Type type;
+			volatile int primitiveUnit;
+			volatile int pixelCluster;
+		};
+
+		struct PrimitiveProgress
+		{
+			void init()
+			{
+				drawCall = 0;
+				firstPrimitive = 0;
+				primitiveCount = 0;
+				visible = 0;
+				references = 0;
+			}
+
+			volatile int drawCall;
+			volatile int firstPrimitive;
+			volatile int primitiveCount;
+			volatile int visible;
+			volatile int references;
+		};
+
+		struct PixelProgress
+		{
+			void init()
+			{
+				drawCall = 0;
+				processedPrimitives = 0;
+				executing = false;
+			}
+
+			volatile int drawCall;
+			volatile int processedPrimitives;
+			volatile bool executing;
+		};
+
+	public:
+		Renderer(Context *context, bool halfIntegerCoordinates, bool symmetricNormalizedDepth, bool booleanFaceRegister, bool fullPixelPositionRegister, bool exactColorRounding);
+
+		virtual ~Renderer();
+
+		virtual void blit(Surface *source, const Rect &sRect, Surface *dest, const Rect &dRect, bool filter);
+		virtual void draw(Context::DrawType drawType, unsigned int indexOffset, unsigned int count, bool update = true);
+
+		virtual void setIndexBuffer(Resource *indexBuffer);
+
+		virtual void setMultiSampleMask(unsigned int mask);
+		virtual void setTransparencyAntialiasing(Context::TransparencyAntialiasing transparencyAntialiasing);
+
+		virtual void setTextureResource(unsigned int sampler, Resource *resource);
+		virtual void setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type);
+
+		virtual void setTextureFilter(SamplerType type, int sampler, FilterType textureFilter);
+		virtual void setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter);
+		virtual void setGatherEnable(SamplerType type, int sampler, bool enable);
+		virtual void setAddressingModeU(SamplerType type, int sampler, AddressingMode addressingMode);
+		virtual void setAddressingModeV(SamplerType type, int sampler, AddressingMode addressingMode);
+		virtual void setAddressingModeW(SamplerType type, int sampler, AddressingMode addressingMode);
+		virtual void setReadSRGB(SamplerType type, int sampler, bool sRGB);
+		virtual void setMipmapLOD(SamplerType type, int sampler, float bias);
+		virtual void setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor);
+		virtual void setMaxAnisotropy(SamplerType type, int sampler, unsigned int maxAnisotropy);
+		
+		virtual void setPointSpriteEnable(bool pointSpriteEnable);
+		virtual void setPointScaleEnable(bool pointScaleEnable);
+
+		virtual void setDepthBias(float bias);
+		virtual void setSlopeDepthBias(float slopeBias);
+
+		// Programmable pipelines
+		virtual void setPixelShader(const PixelShader *shader);
+		virtual void setVertexShader(const VertexShader *shader);
+
+		virtual void setPixelShaderConstantF(int index, const float value[4], int count = 1);
+		virtual void setPixelShaderConstantI(int index, const int value[4], int count = 1);
+		virtual void setPixelShaderConstantB(int index, const int *boolean, int count = 1);
+
+		virtual void setVertexShaderConstantF(int index, const float value[4], int count = 1);
+		virtual void setVertexShaderConstantI(int index, const int value[4], int count = 1);
+		virtual void setVertexShaderConstantB(int index, const int *boolean, int count = 1);
+
+		// Viewport & Clipper
+		virtual void setViewport(const Viewport &viewport);
+		virtual void setScissor(const Rect &scissor);
+		virtual void setClipFlags(int flags);
+		virtual void setClipPlane(unsigned int index, const float plane[4]);
+
+		// Partial transform
+		virtual void setModelMatrix(const Matrix &M, int i = 0);
+		virtual void setViewMatrix(const Matrix &V);
+		virtual void setBaseMatrix(const Matrix &B);
+		virtual void setProjectionMatrix(const Matrix &P);
+
+		virtual void addQuery(Query *query);
+		virtual void removeQuery(Query *query);
+
+		void synchronize();
+
+		#if PERF_HUD
+			// Performance timers
+			int getThreadCount();
+			int64_t getVertexTime(int thread);
+			int64_t getSetupTime(int thread);
+			int64_t getPixelTime(int thread);
+			void resetTimers();
+		#endif
+
+	private:
+		static void threadFunction(void *parameters);
+		void threadLoop(int threadIndex);
+		void taskLoop(int threadIndex);
+		void findAvailableTasks();
+		void scheduleTask(int threadIndex);
+		void executeTask(int threadIndex);
+		void finishRendering(Task &pixelTask);
+
+		void processPrimitiveVertices(int unit, unsigned int start, unsigned int count, unsigned int loop, int thread);
+
+		static int setupSolidTriangles(Renderer *renderer, int batch, int count);
+		static int setupWireframeTriangle(Renderer *renderer, int batch, int count);
+		static int setupVertexTriangle(Renderer *renderer, int batch, int count);
+		static int setupLines(Renderer *renderer, int batch, int count);
+		static int setupPoints(Renderer *renderer, int batch, int count);
+
+		static bool setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw);
+		static bool setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw);
+
+		bool isReadWriteTexture(int sampler);
+		void updateClipper();
+		void updateConfiguration(bool initialUpdate = false);
+		static unsigned int computeClipFlags(const float4 &v, const DrawData &data);
+		void initializeThreads(int threadCount);
+		void terminateThreads();
+		void deleteBatches();
+
+		void loadConstants(const VertexShader *vertexShader);
+		void loadConstants(const PixelShader *pixelShader);
+
+		Context *context;
+		Clipper *clipper;
+		Viewport viewport;
+		Rect scissor;
+		int clipFlags;
+
+		Triangle *triangleBatch[16];
+		Primitive *primitiveBatch[16];
+
+		// User-defined clipping planes
+		Plane userPlane[6];
+		Plane clipPlane[6];   // Tranformed to clip space
+		bool updateClipPlanes;
+
+		volatile bool exitThreads;
+		volatile int threadsAwake;
+		Thread *worker[16];
+		Event *resume[16];         // Events for resuming threads
+		Event *suspend[16];        // Events for suspending threads
+		Event *resumeApp;          // Event for resuming the application thread
+
+		PrimitiveProgress primitiveProgress[16];
+		PixelProgress pixelProgress[16];
+		Task task[16];   // Current tasks for threads
+
+		enum {DRAW_COUNT = 16};   // Number of draw calls buffered
+		DrawCall *drawCall[DRAW_COUNT];
+		DrawCall *drawList[DRAW_COUNT];
+
+		volatile int currentDraw;
+		volatile int nextDraw;
+
+		Task taskQueue[32];
+		unsigned int qHead;
+		unsigned int qSize;
+
+		BackoffLock mutex;
+
+		#if PERF_HUD
+			int64_t vertexTime[16];
+			int64_t setupTime[16];
+			int64_t pixelTime[16];
+		#endif
+
+		VertexTask *vertexTask[16];
+
+		SwiftConfig *swiftConfig;
+
+		std::list<Query*> queries;
+		Resource *sync;
+
+		VertexProcessor::State vertexState;
+		SetupProcessor::State setupState;
+		PixelProcessor::State pixelState;
+		int (*setupPrimitives)(Renderer *renderer, int batch, int count);
+
+		Routine *vertexRoutine;
+		Routine *setupRoutine;
+		Routine *pixelRoutine;
+
+		Blitter blitter;
+	};
+}
+
+#endif   // sw_Renderer_hpp
diff --git a/src/Renderer/RoutineCache.hpp b/src/Renderer/RoutineCache.hpp
new file mode 100644
index 0000000..5e421e8
--- /dev/null
+++ b/src/Renderer/RoutineCache.hpp
@@ -0,0 +1,172 @@
+// SwiftShader Software Renderer

+//

+// Copyright(c) 2005-2012 TransGaming Inc.

+//

+// All rights reserved. No part of this software may be copied, distributed, transmitted,

+// transcribed, stored in a retrieval system, translated into any human or computer

+// language by any means, or disclosed to third parties without the explicit written

+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

+// or implied, including but not limited to any patent rights, are granted to you.

+//

+

+#ifndef sw_RoutineCache_hpp

+#define sw_RoutineCache_hpp

+

+#include "LRUCache.hpp"

+

+#include "Reactor/Reactor.hpp"

+

+namespace sw

+{

+	template<class State>

+	class RoutineCache : public LRUCache<State, Routine>

+	{

+	public:

+		RoutineCache(int n, const char *precache = 0);

+		~RoutineCache();

+

+	private:

+		const char *precache;

+		#if defined(_WIN32)

+		HMODULE precacheDLL;

+		#endif

+	};

+}

+

+#if defined(_WIN32)

+	#include "Shader/Constants.hpp"

+	#include "Reactor/DLL.hpp"

+#endif

+

+namespace sw

+{

+	template<class State>

+	RoutineCache<State>::RoutineCache(int n, const char *precache) : LRUCache<State, Routine>(n), precache(precache)

+	{

+		#if defined(_WIN32)

+			precacheDLL = 0;

+

+			if(precache)

+			{

+				char dllName[1024]; sprintf(dllName, "%s.dll", precache);

+				char dirName[1024]; sprintf(dirName, "%s.dir", precache);

+

+				precacheDLL = LoadLibrary(dllName);

+				FILE *dir = fopen(dirName, "rb");

+				int ordinal = 1;

+

+				while(precacheDLL && dir)

+				{

+					State state;

+					int offset;

+					int size;

+

+					size_t bytes = fread(&state, 1, sizeof(State), dir);

+					bytes += fread(&offset, 1, sizeof(offset), dir);

+					bytes += fread(&size, 1, sizeof(size), dir);

+

+					if(bytes != sizeof(State) + sizeof(offset) + sizeof(size))

+					{

+						break;

+					}

+

+					void (*routine)(void) = (void(*)(void))GetProcAddress(precacheDLL, (char*)ordinal);

+					ordinal++;

+

+					if(routine)

+					{

+						add(state, new Routine(routine, size, offset));

+					}

+				}

+

+				if(dir)

+				{

+					fclose(dir);

+				}

+			}

+		#endif

+	}

+

+	template<class State>

+	RoutineCache<State>::~RoutineCache()

+	{

+		#if defined(_WIN32)

+			char dllName[1024]; sprintf(dllName, "%s.dll", precache);

+			char dirName[1024]; sprintf(dirName, "%s.dir", precache);

+

+			if(precache)

+			{

+				DLL dll(dllName, &constants, sizeof(Constants));

+				FILE *dir = fopen(dirName, "wb");

+

+				for(int i = 0; i < getSize(); i++)

+				{

+					State &state = getKey(i);

+					Routine *routine = query(state);

+

+					if(routine)

+					{

+						unsigned char *buffer = (unsigned char*)routine->getBuffer();

+						unsigned char *entry = (unsigned char*)routine->getEntry();

+						int size = routine->getBufferSize();

+						int codeSize = routine->getCodeSize();

+

+						#ifndef _M_AMD64

+							for(int j = 1; j < codeSize - 4; j++)

+							{

+								unsigned char modRM_SIB = entry[j - 1];

+								unsigned int address = *(unsigned int*)&entry[j];

+

+								if((modRM_SIB & 0x05) == 0x05 && (address % 4) == 0)

+								{

+									if(address >= (unsigned int)buffer && address < (unsigned int)entry)   // Constant stored above the function entry

+									{

+										dll.addRelocation(buffer, &entry[j], true);

+

+										j += 4;

+									}

+								}

+							}

+						#else

+							for(int j = 1; j < codeSize - 4; j++)

+							{

+								unsigned char modRM_SIB = entry[j - 1];

+								uint64_t address = *(uint64_t*)&entry[j];

+

+							//	if((modRM_SIB & 0x05) == 0x05 && (address % 4) == 0)

+								{

+									if(address >= (uint64_t)buffer && address < (uint64_t)entry)   // Constant stored above the function entry

+									{

+										dll.addRelocation(buffer, &entry[j], true);

+

+										j += 4;

+									}

+								}

+							}

+						#endif

+

+						dll.addFunction(buffer, entry, size);

+						fwrite(&state, 1, sizeof(State), dir);

+						int offset = (int)(entry - buffer);

+						fwrite(&offset, 1, sizeof(offset), dir);

+						fwrite(&size, 1, sizeof(size), dir);

+					}

+				}

+

+				FreeLibrary(precacheDLL);

+

+				dll.emit();

+				fclose(dir);

+			}

+			else

+			{

+				FreeLibrary(precacheDLL);

+

+				remove(dllName);

+				remove(dirName);

+			}

+		#endif

+	}

+}

+

+#endif   // sw_RoutineCache_hpp

diff --git a/src/Renderer/Sampler.hpp b/src/Renderer/Sampler.hpp
index 14d0487..2132a06 100644
--- a/src/Renderer/Sampler.hpp
+++ b/src/Renderer/Sampler.hpp
@@ -1,195 +1,195 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2011 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef sw_Sampler_hpp

-#define sw_Sampler_hpp

-

-#include "Main/Config.hpp"

-#include "Renderer/Surface.hpp"

-

-namespace sw

-{

-	struct Mipmap

-	{

-		void *buffer[6];

-

-		union

-		{

-			struct

-			{

-				int64_t uInt;

-				int64_t vInt;

-				int64_t wInt;

-				int64_t uFrac;

-				int64_t vFrac;

-				int64_t wFrac;

-			};

-

-			struct

-			{

-				float4 fWidth;

-				float4 fHeight;

-				float4 fDepth;

-			};

-		};

-

-		short uHalf[4];

-		short vHalf[4];

-		short wHalf[4];

-		short width[4];

-		short height[4];

-		short depth[4];

-		short onePitchP[4];

-		int sliceP[2];

-	};

-

-	struct Texture

-	{

-		Mipmap mipmap[MIPMAP_LEVELS];

-

-		float LOD;

-		float4 widthHeightLOD;

-		float4 widthLOD;

-		float4 heightLOD;

-		float4 depthLOD;

-

-		word4 borderColor4[4];

-		float4 borderColorF[4];

-		float maxAnisotropy;

-	};

-

-	enum SamplerType

-	{

-		SAMPLER_PIXEL,

-		SAMPLER_VERTEX

-	};

-

-	enum TextureType

-	{

-		TEXTURE_NULL,

-		TEXTURE_2D,

-		TEXTURE_CUBE,

-		TEXTURE_3D,

-

-		TEXTURE_LAST = TEXTURE_3D

-	};

-

-	enum FilterType

-	{

-		FILTER_POINT,

-		FILTER_GATHER,

-		FILTER_LINEAR,

-		FILTER_ANISOTROPIC,

-

-		FILTER_LAST = FILTER_ANISOTROPIC

-	};

-

-	enum MipmapType

-	{

-		MIPMAP_NONE,

-		MIPMAP_POINT,

-		MIPMAP_LINEAR,

-		

-		MIPMAP_LAST = MIPMAP_LINEAR

-	};

-

-	enum AddressingMode

-	{

-		ADDRESSING_WRAP,

-		ADDRESSING_CLAMP,

-		ADDRESSING_MIRROR,

-		ADDRESSING_MIRRORONCE,

-		ADDRESSING_BORDER,

-

-		ADDRESSING_LAST = ADDRESSING_BORDER

-	};

-

-	class Sampler

-	{

-	public:

-		struct State

-		{

-			State();

-

-			unsigned int textureType     : BITS(TEXTURE_LAST);

-			unsigned int textureFormat   : BITS(FORMAT_LAST);

-			unsigned int textureFilter   : BITS(FILTER_LAST);

-			unsigned int addressingModeU : BITS(ADDRESSING_LAST);

-			unsigned int addressingModeV : BITS(ADDRESSING_LAST);

-			unsigned int addressingModeW : BITS(ADDRESSING_LAST);

-			unsigned int mipmapFilter    : BITS(FILTER_LAST);

-			unsigned int hasNPOTTexture	 : 1;

-			unsigned int sRGB            : 1;

-

-			#if PERF_PROFILE

-			bool compressedFormat        : 1;

-			#endif

-		};

-

-		Sampler();

-

-		~Sampler();

-

-		State samplerState() const;

-

-		void setTextureLevel(int face, int level, Surface *surface, TextureType type);

-

-		void setTextureFilter(FilterType textureFilter);

-		void setMipmapFilter(MipmapType mipmapFilter);

-		void setGatherEnable(bool enable);

-		void setAddressingModeU(AddressingMode addressingMode);

-		void setAddressingModeV(AddressingMode addressingMode);

-		void setAddressingModeW(AddressingMode addressingMode);

-		void setReadSRGB(bool sRGB);

-		void setBorderColor(const Color<float> &borderColor);

-		void setMaxAnisotropy(unsigned int maxAnisotropy);

-

-		static void setFilterQuality(FilterType maximumFilterQuality);

-		static void setMipmapQuality(MipmapType maximumFilterQuality);

-		void setMipmapLOD(float lod);

-

-		bool hasTexture() const;

-		bool hasUnsignedTexture() const;

-		bool hasCubeTexture() const;

-		bool hasVolumeTexture() const;

-

-		const Texture &getTextureData();

-

-	private:

-		MipmapType mipmapFilter() const;

-		bool hasNPOTTexture() const;

-		TextureType getTextureType() const;

-		FilterType getTextureFilter() const;

-		AddressingMode getAddressingModeU() const;

-		AddressingMode getAddressingModeV() const;

-		AddressingMode getAddressingModeW() const;

-

-		Format externalTextureFormat;

-		Format internalTextureFormat;

-		TextureType textureType;

-

-		FilterType textureFilter;

-		AddressingMode addressingModeU;

-		AddressingMode addressingModeV;

-		AddressingMode addressingModeW;

-		MipmapType mipmapFilterState;

-		bool sRGB;

-		bool gather;

-

-		Texture texture;

-		float exp2LOD;

-

-		static FilterType maximumTextureFilterQuality;

-		static MipmapType maximumMipmapFilterQuality;

-	};

-}

-

-#endif   // sw_Sampler_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2011 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef sw_Sampler_hpp
+#define sw_Sampler_hpp
+
+#include "Main/Config.hpp"
+#include "Renderer/Surface.hpp"
+
+namespace sw
+{
+	struct Mipmap
+	{
+		void *buffer[6];
+
+		union
+		{
+			struct
+			{
+				int64_t uInt;
+				int64_t vInt;
+				int64_t wInt;
+				int64_t uFrac;
+				int64_t vFrac;
+				int64_t wFrac;
+			};
+
+			struct
+			{
+				float4 fWidth;
+				float4 fHeight;
+				float4 fDepth;
+			};
+		};
+
+		short uHalf[4];
+		short vHalf[4];
+		short wHalf[4];
+		short width[4];
+		short height[4];
+		short depth[4];
+		short onePitchP[4];
+		int sliceP[2];
+	};
+
+	struct Texture
+	{
+		Mipmap mipmap[MIPMAP_LEVELS];
+
+		float LOD;
+		float4 widthHeightLOD;
+		float4 widthLOD;
+		float4 heightLOD;
+		float4 depthLOD;
+
+		word4 borderColor4[4];
+		float4 borderColorF[4];
+		float maxAnisotropy;
+	};
+
+	enum SamplerType
+	{
+		SAMPLER_PIXEL,
+		SAMPLER_VERTEX
+	};
+
+	enum TextureType
+	{
+		TEXTURE_NULL,
+		TEXTURE_2D,
+		TEXTURE_CUBE,
+		TEXTURE_3D,
+
+		TEXTURE_LAST = TEXTURE_3D
+	};
+
+	enum FilterType
+	{
+		FILTER_POINT,
+		FILTER_GATHER,
+		FILTER_LINEAR,
+		FILTER_ANISOTROPIC,
+
+		FILTER_LAST = FILTER_ANISOTROPIC
+	};
+
+	enum MipmapType
+	{
+		MIPMAP_NONE,
+		MIPMAP_POINT,
+		MIPMAP_LINEAR,
+		
+		MIPMAP_LAST = MIPMAP_LINEAR
+	};
+
+	enum AddressingMode
+	{
+		ADDRESSING_WRAP,
+		ADDRESSING_CLAMP,
+		ADDRESSING_MIRROR,
+		ADDRESSING_MIRRORONCE,
+		ADDRESSING_BORDER,
+
+		ADDRESSING_LAST = ADDRESSING_BORDER
+	};
+
+	class Sampler
+	{
+	public:
+		struct State
+		{
+			State();
+
+			unsigned int textureType     : BITS(TEXTURE_LAST);
+			unsigned int textureFormat   : BITS(FORMAT_LAST);
+			unsigned int textureFilter   : BITS(FILTER_LAST);
+			unsigned int addressingModeU : BITS(ADDRESSING_LAST);
+			unsigned int addressingModeV : BITS(ADDRESSING_LAST);
+			unsigned int addressingModeW : BITS(ADDRESSING_LAST);
+			unsigned int mipmapFilter    : BITS(FILTER_LAST);
+			unsigned int hasNPOTTexture	 : 1;
+			unsigned int sRGB            : 1;
+
+			#if PERF_PROFILE
+			bool compressedFormat        : 1;
+			#endif
+		};
+
+		Sampler();
+
+		~Sampler();
+
+		State samplerState() const;
+
+		void setTextureLevel(int face, int level, Surface *surface, TextureType type);
+
+		void setTextureFilter(FilterType textureFilter);
+		void setMipmapFilter(MipmapType mipmapFilter);
+		void setGatherEnable(bool enable);
+		void setAddressingModeU(AddressingMode addressingMode);
+		void setAddressingModeV(AddressingMode addressingMode);
+		void setAddressingModeW(AddressingMode addressingMode);
+		void setReadSRGB(bool sRGB);
+		void setBorderColor(const Color<float> &borderColor);
+		void setMaxAnisotropy(unsigned int maxAnisotropy);
+
+		static void setFilterQuality(FilterType maximumFilterQuality);
+		static void setMipmapQuality(MipmapType maximumFilterQuality);
+		void setMipmapLOD(float lod);
+
+		bool hasTexture() const;
+		bool hasUnsignedTexture() const;
+		bool hasCubeTexture() const;
+		bool hasVolumeTexture() const;
+
+		const Texture &getTextureData();
+
+	private:
+		MipmapType mipmapFilter() const;
+		bool hasNPOTTexture() const;
+		TextureType getTextureType() const;
+		FilterType getTextureFilter() const;
+		AddressingMode getAddressingModeU() const;
+		AddressingMode getAddressingModeV() const;
+		AddressingMode getAddressingModeW() const;
+
+		Format externalTextureFormat;
+		Format internalTextureFormat;
+		TextureType textureType;
+
+		FilterType textureFilter;
+		AddressingMode addressingModeU;
+		AddressingMode addressingModeV;
+		AddressingMode addressingModeW;
+		MipmapType mipmapFilterState;
+		bool sRGB;
+		bool gather;
+
+		Texture texture;
+		float exp2LOD;
+
+		static FilterType maximumTextureFilterQuality;
+		static MipmapType maximumMipmapFilterQuality;
+	};
+}
+
+#endif   // sw_Sampler_hpp
diff --git a/src/Renderer/SetupProcessor.cpp b/src/Renderer/SetupProcessor.cpp
index 775564e..65bc334 100644
--- a/src/Renderer/SetupProcessor.cpp
+++ b/src/Renderer/SetupProcessor.cpp
@@ -24,6 +24,8 @@
 	extern bool complementaryDepthBuffer;
 	extern bool fullPixelPositionRegister;
 
+	bool precacheSetup = false;
+
 	unsigned int SetupProcessor::States::computeHash()
 	{
 		unsigned int *state = (unsigned int*)this;
@@ -54,7 +56,6 @@
 
 	SetupProcessor::SetupProcessor(Context *context) : context(context)
 	{
-		precacheDLL = 0;
 		routineCache = 0;
 		setRoutineCacheSize(1024);
 	}
@@ -87,7 +88,7 @@
 		state.positionRegister = Pos;
 		state.pointSizeRegister = 0xF;   // No vertex point size
 
-		state.multiSample = context->renderTarget[0]->getMultiSampleCount();
+		state.multiSample = context->getMultiSampleCount();
 
 		if(context->vertexShader)
 		{
@@ -235,6 +236,6 @@
 	void SetupProcessor::setRoutineCacheSize(int cacheSize)
 	{
 		delete routineCache;
-		routineCache = new LRUCache<State, Routine>(clamp(cacheSize, 1, 65536));
+		routineCache = new RoutineCache<State>(clamp(cacheSize, 1, 65536), precacheSetup ? "sw-setup" : 0);
 	}
 }
diff --git a/src/Renderer/SetupProcessor.hpp b/src/Renderer/SetupProcessor.hpp
index 0537057..1e57940 100644
--- a/src/Renderer/SetupProcessor.hpp
+++ b/src/Renderer/SetupProcessor.hpp
@@ -13,7 +13,7 @@
 #define sw_SetupProcessor_hpp

 

 #include "Context.hpp"

-#include "LRUCache.hpp"

+#include "RoutineCache.hpp"

 #include "Shader/VertexShader.hpp"

 #include "Shader/PixelShader.hpp"

 #include "Common/Types.hpp"

@@ -24,7 +24,6 @@
 	struct Triangle;

 	struct Polygon;

 	struct Vertex;

-	class Routine;

 	struct DrawCall;

 	struct DrawData;

 

@@ -80,7 +79,7 @@
 			unsigned int hash;

 		};

 

-		typedef bool (__cdecl *RoutinePointer)(Primitive *primitive, const Triangle *triangle, const Polygon *polygon, const DrawData *draw);

+		typedef bool (*RoutinePointer)(Primitive *primitive, const Triangle *triangle, const Polygon *polygon, const DrawData *draw);

 

 		SetupProcessor(Context *context);

 

@@ -98,8 +97,7 @@
 	private:

 		Context *const context;

 

-		LRUCache<State, Routine> *routineCache;

-		HMODULE precacheDLL;

+		RoutineCache<State> *routineCache;

 	};

 }

 

diff --git a/src/Renderer/Stream.hpp b/src/Renderer/Stream.hpp
index b038a99..66ba633 100644
--- a/src/Renderer/Stream.hpp
+++ b/src/Renderer/Stream.hpp
@@ -1,98 +1,98 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2012 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef sw_Stream_hpp

-#define sw_Stream_hpp

-

-#include "Common/Types.hpp"

-

-namespace sw

-{

-	class Resource;

-

-	enum StreamType

-	{

-		STREAMTYPE_COLOR,     // 4 normalized unsigned bytes, ZYXW order

-		STREAMTYPE_UDEC3,     // 3 unsigned 10-bit fields

-		STREAMTYPE_DEC3N,     // 3 normalized signed 10-bit fields

-		STREAMTYPE_INDICES,   // 4 unsigned bytes, stored unconverted into X component

-		STREAMTYPE_FLOAT,     // Normalization ignored

-		STREAMTYPE_BYTE,

-		STREAMTYPE_SBYTE,

-		STREAMTYPE_SHORT,

-		STREAMTYPE_USHORT,

-		STREAMTYPE_FIXED,     // Normalization ignored (16.16 format)

-		STREAMTYPE_HALF,      // Normalization ignored

-

-		STREAMTYPE_LAST = STREAMTYPE_HALF

-	};

-

-	struct StreamResource

-	{

-		Resource *resource;

-		const void *buffer;

-		unsigned int stride;

-	};

-

-	struct Stream : public StreamResource

-	{

-		Stream(Resource *resource = 0, const void *buffer = 0, unsigned int stride = 0)

-		{

-			this->resource = resource;

-			this->buffer = buffer;

-			this->stride = stride;

-		}

-

-		Stream &define(StreamType type, unsigned int count, bool normalized = false)

-		{

-			this->type = type;

-			this->count = count;

-			this->normalized = normalized;

-

-			return *this;

-		}

-

-		Stream &define(const void *buffer, StreamType type, unsigned int count, bool normalized = false)

-		{

-			this->buffer = buffer;

-			this->type = type;

-			this->count = count;

-			this->normalized = normalized;

-

-			return *this;

-		}

-

-		Stream &defaults()

-		{

-			static const float4 null = {0, 0, 0, 1};

-	

-			resource = 0;

-			buffer = &null;

-			stride = 0;

-			type = STREAMTYPE_FLOAT;

-			count = 0;

-			normalized = false;

-

-			return *this;

-		}

-

-		operator bool() const   // Returns true if stream contains data

-		{

-			return count != 0;

-		}

-

-		StreamType type;

-		unsigned char count;

-		bool normalized;

-	};

-}

-

-#endif   // sw_Stream_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2012 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef sw_Stream_hpp
+#define sw_Stream_hpp
+
+#include "Common/Types.hpp"
+
+namespace sw
+{
+	class Resource;
+
+	enum StreamType
+	{
+		STREAMTYPE_COLOR,     // 4 normalized unsigned bytes, ZYXW order
+		STREAMTYPE_UDEC3,     // 3 unsigned 10-bit fields
+		STREAMTYPE_DEC3N,     // 3 normalized signed 10-bit fields
+		STREAMTYPE_INDICES,   // 4 unsigned bytes, stored unconverted into X component
+		STREAMTYPE_FLOAT,     // Normalization ignored
+		STREAMTYPE_BYTE,
+		STREAMTYPE_SBYTE,
+		STREAMTYPE_SHORT,
+		STREAMTYPE_USHORT,
+		STREAMTYPE_FIXED,     // Normalization ignored (16.16 format)
+		STREAMTYPE_HALF,      // Normalization ignored
+
+		STREAMTYPE_LAST = STREAMTYPE_HALF
+	};
+
+	struct StreamResource
+	{
+		Resource *resource;
+		const void *buffer;
+		unsigned int stride;
+	};
+
+	struct Stream : public StreamResource
+	{
+		Stream(Resource *resource = 0, const void *buffer = 0, unsigned int stride = 0)
+		{
+			this->resource = resource;
+			this->buffer = buffer;
+			this->stride = stride;
+		}
+
+		Stream &define(StreamType type, unsigned int count, bool normalized = false)
+		{
+			this->type = type;
+			this->count = count;
+			this->normalized = normalized;
+
+			return *this;
+		}
+
+		Stream &define(const void *buffer, StreamType type, unsigned int count, bool normalized = false)
+		{
+			this->buffer = buffer;
+			this->type = type;
+			this->count = count;
+			this->normalized = normalized;
+
+			return *this;
+		}
+
+		Stream &defaults()
+		{
+			static const float4 null = {0, 0, 0, 1};
+	
+			resource = 0;
+			buffer = &null;
+			stride = 0;
+			type = STREAMTYPE_FLOAT;
+			count = 0;
+			normalized = false;
+
+			return *this;
+		}
+
+		operator bool() const   // Returns true if stream contains data
+		{
+			return count != 0;
+		}
+
+		StreamType type;
+		unsigned char count;
+		bool normalized;
+	};
+}
+
+#endif   // sw_Stream_hpp
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 8dc90ae..aadaf97 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -1,6 +1,6 @@
 // SwiftShader Software Renderer
 //
-// Copyright(c) 2005-2012 TransGaming Inc.
+// Copyright(c) 2005-2013 TransGaming Inc.
 //
 // All rights reserved. No part of this software may be copied, distributed, transmitted,
 // transcribed, stored in a retrieval system, translated into any human or computer
@@ -182,8 +182,8 @@
 			break;
 		case FORMAT_D32F:
 		case FORMAT_D32F_LOCKABLE:
-		case FORMAT_D32F_TEXTURE:
-		case FORMAT_D32F_SHADOW:
+		case FORMAT_D32FS8_TEXTURE:
+		case FORMAT_D32FS8_SHADOW:
 			*((float*)element) = color.r;
 			break;
 		case FORMAT_D32F_COMPLEMENTARY:
@@ -536,15 +536,15 @@
 			break;
 		case FORMAT_D32F:
 		case FORMAT_D32F_LOCKABLE:
-		case FORMAT_D32F_TEXTURE:
-		case FORMAT_D32F_SHADOW:
+		case FORMAT_D32FS8_TEXTURE:
+		case FORMAT_D32FS8_SHADOW:
 			r = *(float*)element;
 			g = r;
 			b = r;
 			a = r;
 			break;
 		case FORMAT_D32F_COMPLEMENTARY:
-			r = 1 - *(float*)element;
+			r = 1.0f - *(float*)element;
 			g = r;
 			b = r;
 			a = r;
@@ -655,13 +655,15 @@
 		{
 		#if S3TC_SUPPORT
 		case FORMAT_DXT1:
+		#endif
 		case FORMAT_ATI1:
 			return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
+		#if S3TC_SUPPORT
 		case FORMAT_DXT3:
 		case FORMAT_DXT5:
+		#endif
 		case FORMAT_ATI2:
 			return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
-		#endif
 		default:
 			return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
 		}
@@ -692,7 +694,6 @@
 		external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
 		external.lock = LOCK_UNLOCKED;
 		external.dirty = false;
-		external.paletteUsed = 0;
 
 		internal.buffer = 0;
 		internal.width = width;
@@ -706,7 +707,6 @@
 		internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
 		internal.lock = LOCK_UNLOCKED;
 		internal.dirty = false;
-		internal.paletteUsed = 0;
 
 		stencil.buffer = 0;
 		stencil.width = width;
@@ -720,9 +720,9 @@
 		stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
 		stencil.lock = LOCK_UNLOCKED;
 		stencil.dirty = false;
-		stencil.paletteUsed = 0;
 
 		dirtyMipmaps = true;
+		paletteUsed = 0;
 	}
 
 	Surface::~Surface()
@@ -772,6 +772,8 @@
 			{
 				update(external, internal);
 			}
+
+			internal.dirty = false;
 		}
 
 		switch(lock)
@@ -841,17 +843,15 @@
 			}
 		}
 
-		if(external.dirty)
+		if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
 		{
 			if(lock != LOCK_DISCARD)
 			{
 				update(internal, external);
 			}
-		}
 
-		if(isPalette(external.format) && internal.paletteUsed != Surface::paletteID)
-		{
-			update(internal, external);
+			external.dirty = false;
+			paletteUsed = Surface::paletteID;
 		}
 
 		switch(lock)
@@ -892,11 +892,6 @@
 			stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
 		}
 
-		if(external.dirty)
-		{
-			update(stencil, external);   // FIXME: Only when not discarding
-		}
-
 		return stencil.lockRect(0, 0, front, LOCK_READWRITE);   // FIXME
 	}
 
@@ -940,9 +935,9 @@
 		case FORMAT_DXT1:				return 2;   // Column of four pixels
 		case FORMAT_DXT3:				return 4;   // Column of four pixels
 		case FORMAT_DXT5:				return 4;   // Column of four pixels
+		#endif
 		case FORMAT_ATI1:				return 2;   // Column of four pixels
 		case FORMAT_ATI2:				return 4;   // Column of four pixels
-		#endif
 		// Bumpmap formats
 		case FORMAT_V8U8:				return 2;
 		case FORMAT_L6V5U5:				return 2;
@@ -973,10 +968,10 @@
 		case FORMAT_D32F:				return 4;
 		case FORMAT_D32F_COMPLEMENTARY:	return 4;
 		case FORMAT_D32F_LOCKABLE:		return 4;
-		case FORMAT_D32F_TEXTURE:		return 4;
-		case FORMAT_D32F_SHADOW:		return 4;
-		case FORMAT_DF24:				return 4;
-		case FORMAT_DF16:				return 2;
+		case FORMAT_D32FS8_TEXTURE:		return 4;
+		case FORMAT_D32FS8_SHADOW:		return 4;
+		case FORMAT_DF24S8:				return 4;
+		case FORMAT_DF16S8:				return 2;
 		case FORMAT_INTZ:				return 4;
 		case FORMAT_S8:					return 1;
 		default:
@@ -1001,11 +996,11 @@
 		case FORMAT_DXT3:
 		case FORMAT_DXT5:
 			return 16 * ((width + 3) / 4);   // 128 bit per 4x4 block, computed per 4 rows
+		#endif
 		case FORMAT_ATI1:
 			return 2 * ((width + 3) / 4);    // 64 bit per 4x4 block, computed per row
 		case FORMAT_ATI2:
 			return 4 * ((width + 3) / 4);    // 128 bit per 4x4 block, computed per row
-		#endif
 		default:
 			return bytes(format) * width;
 		}
@@ -1032,9 +1027,9 @@
 		case FORMAT_DXT3:
 		case FORMAT_DXT5:
 			return pitchB(width, format, target) * ((height + 3) / 4);   // Pitch computed per 4 rows
+		#endif
 		case FORMAT_ATI1:   // Pitch computed per row
 		case FORMAT_ATI2:   // Pitch computed per row
-		#endif
 		default:
 			return pitchB(width, format, target) * height;
 		}
@@ -1077,9 +1072,6 @@
 			default:				genericUpdate(destination, source);		break;
 			}
 		}
-
-		source.dirty = false;
-		destination.paletteUsed = Surface::paletteID;
 	}
 
 	void Surface::genericUpdate(Buffer &destination, Buffer &source)
@@ -1792,13 +1784,15 @@
 		{
 		#if S3TC_SUPPORT
 		case FORMAT_DXT1:
+		#endif
 		case FORMAT_ATI1:
 			return width4 * height4 * depth / 2;
+		#if S3TC_SUPPORT
 		case FORMAT_DXT3:
 		case FORMAT_DXT5:
+		#endif
 		case FORMAT_ATI2:
 			return width4 * height4 * depth;
-		#endif
 		default:
 			return bytes(format) * width * height * depth;
 		}
@@ -1820,10 +1814,10 @@
 		case FORMAT_D24S8:
 		case FORMAT_D24FS8:
 		case FORMAT_S8:
-		case FORMAT_DF24:
-		case FORMAT_DF16:
-		case FORMAT_D32F_TEXTURE:
-		case FORMAT_D32F_SHADOW:
+		case FORMAT_DF24S8:
+		case FORMAT_DF16S8:
+		case FORMAT_D32FS8_TEXTURE:
+		case FORMAT_D32FS8_SHADOW:
 		case FORMAT_INTZ:
 			return true;
 		default:
@@ -1843,10 +1837,10 @@
 		case FORMAT_D32F:
 		case FORMAT_D32F_COMPLEMENTARY:
 		case FORMAT_D32F_LOCKABLE:
-		case FORMAT_DF24:
-		case FORMAT_DF16:
-		case FORMAT_D32F_TEXTURE:
-		case FORMAT_D32F_SHADOW:
+		case FORMAT_DF24S8:
+		case FORMAT_DF16S8:
+		case FORMAT_D32FS8_TEXTURE:
+		case FORMAT_D32FS8_SHADOW:
 		case FORMAT_INTZ:
 			return true;
 		case FORMAT_S8:
@@ -1895,8 +1889,8 @@
 		case FORMAT_D32F:
 		case FORMAT_D32F_COMPLEMENTARY:
 		case FORMAT_D32F_LOCKABLE:
-		case FORMAT_D32F_TEXTURE:
-		case FORMAT_D32F_SHADOW:
+		case FORMAT_D32FS8_TEXTURE:
+		case FORMAT_D32FS8_SHADOW:
 			return true;
 		default:
 			ASSERT(false);
@@ -1918,8 +1912,8 @@
 		case FORMAT_D32F:
 		case FORMAT_D32F_COMPLEMENTARY:
 		case FORMAT_D32F_LOCKABLE:
-		case FORMAT_D32F_TEXTURE:
-		case FORMAT_D32F_SHADOW:
+		case FORMAT_D32FS8_TEXTURE:
+		case FORMAT_D32FS8_SHADOW:
 		case FORMAT_A8:
 		case FORMAT_R8:
 		case FORMAT_L8:
@@ -1996,9 +1990,9 @@
 		case FORMAT_DXT1:
 		case FORMAT_DXT3:
 		case FORMAT_DXT5:
+		#endif
 		case FORMAT_ATI1:
 		case FORMAT_ATI2:
-		#endif
 			return true;
 		default:
 			return false;
@@ -2032,10 +2026,10 @@
 		case FORMAT_DXT1:
 		case FORMAT_DXT3:
 		case FORMAT_DXT5:
+		#endif
 		case FORMAT_ATI1:
 		case FORMAT_ATI2:
 			return true;
-		#endif
 		default:
 			return false;
 		}
@@ -2060,8 +2054,8 @@
 		case FORMAT_G32R32F:		return 2;
 		case FORMAT_A32B32G32R32F:	return 4;
 		case FORMAT_D32F_LOCKABLE:	return 1;
-		case FORMAT_D32F_TEXTURE:	return 1;
-		case FORMAT_D32F_SHADOW:	return 1;
+		case FORMAT_D32FS8_TEXTURE:	return 1;
+		case FORMAT_D32FS8_SHADOW:	return 1;
 		case FORMAT_A8:				return 1;
 		case FORMAT_R8:				return 1;
 		case FORMAT_L8:				return 1;
@@ -2466,8 +2460,8 @@
 		int y1 = y0 + height;
 
 		if(internal.format == FORMAT_D32F_LOCKABLE ||
-		   internal.format == FORMAT_D32F_TEXTURE ||
-		   internal.format == FORMAT_D32F_SHADOW)
+		   internal.format == FORMAT_D32FS8_TEXTURE ||
+		   internal.format == FORMAT_D32FS8_SHADOW)
 		{
 			float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
 
@@ -2837,12 +2831,12 @@
 
 	bool Surface::identicalFormats() const
 	{
-		return external.format  == internal.format &&
-		       external.width   == internal.width &&
-			   external.height  == internal.height &&
-			   external.depth   == internal.depth &&
-			   external.pitchB  == internal.pitchB &&
-			   external.sliceB  == internal.sliceB;
+		return external.format == internal.format &&
+		       external.width  == internal.width &&
+			   external.height == internal.height &&
+			   external.depth  == internal.depth &&
+			   external.pitchB == internal.pitchB &&
+			   external.sliceB == internal.sliceB;
 	}
 
 	Format Surface::selectInternalFormat(Format format) const
@@ -2900,11 +2894,11 @@
 		case FORMAT_DXT3:
 		case FORMAT_DXT5:
 			return FORMAT_A8R8G8B8;
+		#endif
 		case FORMAT_ATI1:
 			return FORMAT_R8;
 		case FORMAT_ATI2:
 			return FORMAT_G8R8;
-		#endif
 		// Bumpmap formats
 		case FORMAT_V8U8:			return FORMAT_V8U8;
 		case FORMAT_L6V5U5:			return FORMAT_X8L8V8U8;
@@ -2933,7 +2927,7 @@
 		case FORMAT_D24FS8:
 			if(hasParent)   // Texture
 			{
-				return FORMAT_D32F_SHADOW;
+				return FORMAT_D32FS8_SHADOW;
 			}
 			else if(complementaryDepthBuffer)
 			{
@@ -2943,10 +2937,11 @@
 			{
 				return FORMAT_D32F;
 			}
-		case FORMAT_D32F_LOCKABLE:	return FORMAT_D32F_LOCKABLE;
-		case FORMAT_INTZ:			return FORMAT_D32F_TEXTURE;
-		case FORMAT_DF24:			return FORMAT_D32F_SHADOW;
-		case FORMAT_DF16:			return FORMAT_D32F_SHADOW;
+		case FORMAT_D32F_LOCKABLE:  return FORMAT_D32F_LOCKABLE;
+		case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
+		case FORMAT_INTZ:           return FORMAT_D32FS8_TEXTURE;
+		case FORMAT_DF24S8:         return FORMAT_D32FS8_SHADOW;
+		case FORMAT_DF16S8:         return FORMAT_D32FS8_SHADOW;
 		default:
 			ASSERT(false);
 		}
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp
index a50b1c3..130bd0b 100644
--- a/src/Renderer/Surface.hpp
+++ b/src/Renderer/Surface.hpp
@@ -1,6 +1,6 @@
 // SwiftShader Software Renderer

 //

-// Copyright(c) 2005-2012 TransGaming Inc.

+// Copyright(c) 2005-2013 TransGaming Inc.

 //

 // All rights reserved. No part of this software may be copied, distributed, transmitted,

 // transcribed, stored in a retrieval system, translated into any human or computer

@@ -57,13 +57,11 @@
 		FORMAT_P8,

 		FORMAT_A8P8,

 		// Compressed formats

-		#if S3TC_SUPPORT

 		FORMAT_DXT1,

 		FORMAT_DXT3,

 		FORMAT_DXT5,

 		FORMAT_ATI1,

 		FORMAT_ATI2,

-		#endif

 		// Floating-point formats

 		FORMAT_R16F,

 		FORMAT_G16R16F,

@@ -94,10 +92,10 @@
 		FORMAT_D32F,                 // Quad layout

 		FORMAT_D32F_COMPLEMENTARY,   // Quad layout, 1 - z

 		FORMAT_D32F_LOCKABLE,        // Linear layout

-		FORMAT_D32F_TEXTURE,         // Linear layout, no PCF

-		FORMAT_D32F_SHADOW,          // Linear layout, PCF

-		FORMAT_DF24,

-		FORMAT_DF16,

+		FORMAT_D32FS8_TEXTURE,       // Linear layout, no PCF

+		FORMAT_D32FS8_SHADOW,        // Linear layout, PCF

+		FORMAT_DF24S8,

+		FORMAT_DF16S8,

 		FORMAT_INTZ,

 		FORMAT_S8,

 		// Quad layout framebuffer

@@ -147,7 +145,6 @@
 			Lock lock;

 			

 			bool dirty;

-			unsigned int paletteUsed;

 		};

 

 	public:

@@ -350,11 +347,12 @@
 		const bool lockable;

 		const bool renderTarget;

 

+		bool dirtyMipmaps;

+		unsigned int paletteUsed;

+

 		static unsigned int *palette;   // FIXME: Not multi-device safe

 		static unsigned int paletteID;

 

-		bool dirtyMipmaps;

-

 		sw::Resource *resource;

 		bool hasParent;

 	};

diff --git a/src/Renderer/TextureStage.cpp b/src/Renderer/TextureStage.cpp
index a2b3daf..a309ea4 100644
--- a/src/Renderer/TextureStage.cpp
+++ b/src/Renderer/TextureStage.cpp
@@ -1,6 +1,6 @@
 // SwiftShader Software Renderer
 //
-// Copyright(c) 2005-2011 TransGaming Inc.
+// Copyright(c) 2005-2012 TransGaming Inc.
 //
 // All rights reserved. No part of this software may be copied, distributed, transmitted,
 // transcribed, stored in a retrieval system, translated into any human or computer
@@ -14,6 +14,8 @@
 #include "Sampler.hpp"
 #include "Debug.hpp"
 
+#include <string.h>
+
 namespace sw
 {
 	TextureStage::State::State()
diff --git a/src/Renderer/TextureStage.hpp b/src/Renderer/TextureStage.hpp
index 270f949..a195263 100644
--- a/src/Renderer/TextureStage.hpp
+++ b/src/Renderer/TextureStage.hpp
@@ -1,6 +1,6 @@
 // SwiftShader Software Renderer

 //

-// Copyright(c) 2005-2011 TransGaming Inc.

+// Copyright(c) 2005-2012 TransGaming Inc.

 //

 // All rights reserved. No part of this software may be copied, distributed, transmitted,

 // transcribed, stored in a retrieval system, translated into any human or computer

@@ -24,8 +24,8 @@
 

 	class TextureStage

 	{

-		friend Context;        // FIXME

-		friend PixelRoutine;   // FIXME

+		friend class Context;        // FIXME

+		friend class PixelRoutine;   // FIXME

 

 	public:

 		enum StageOperation

diff --git a/src/Renderer/Vector.hpp b/src/Renderer/Vector.hpp
index 4c32d9e..2ff1713 100644
--- a/src/Renderer/Vector.hpp
+++ b/src/Renderer/Vector.hpp
@@ -1,150 +1,150 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2011 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef Vector_hpp

-#define Vector_hpp

-

-namespace sw

-{

-	struct Point;

-	struct Matrix;

-	struct Plane;

-

-	struct Vector

-	{

-		Vector();

-		Vector(const int i);

-		Vector(const Vector &v);

-		Vector(const Point &p);

-		Vector(float v_x, float v_y, float v_z);

-

-		Vector &operator=(const Vector &v);

-

-		union

-		{

-			float v[3];

-

-			struct

-			{

-				float x;

-				float y;

-				float z;

-			};

-		};

-

-		float &operator[](int i);

-		float &operator()(int i);

-

-		const float &operator[](int i) const;

-		const float &operator()(int i) const;

-

-		Vector operator+() const;

-		Vector operator-() const;

-

-		Vector &operator+=(const Vector &v);

-		Vector &operator-=(const Vector &v);

-		Vector &operator*=(float s);

-		Vector &operator/=(float s);

-

-		friend bool operator==(const Vector &u, const Vector &v);

-		friend bool operator!=(const Vector &u, const Vector &v);

-

-		friend Vector operator+(const Vector &u, const Vector &v);

-		friend Vector operator-(const Vector &u, const Vector &v);

-		friend float operator*(const Vector &u, const Vector &v);   // Dot product

-		friend Vector operator*(float s, const Vector &v);

-		friend Vector operator*(const Vector &v, float s);

-		friend Vector operator/(const Vector &v, float s);

-		friend float operator^(const Vector &u, const Vector &v);   // Angle between vectors

-		friend Vector operator%(const Vector &u, const Vector &v);   // Cross product

-

-		friend Vector operator*(const Matrix &M, const Vector& v);

-		friend Vector operator*(const Vector &v, const Matrix &M);

-		friend Vector &operator*=(Vector &v, const Matrix &M);

-

-		static float N(const Vector &v);   // Norm

-		static float N2(const Vector &v);   // Squared norm

-

-		static Vector mirror(const Vector &v, const Plane &p);

-		static Vector reflect(const Vector &v, const Plane &p);

-		static Vector lerp(const Vector &u, const Vector &v, float t);

-	};

-}

-

-#include "Point.hpp"

-

-namespace sw

-{

-	inline Vector::Vector()

-	{

-	}

-

-	inline Vector::Vector(const int i)

-	{

-		const float s = (float)i;

-

-		x = s;

-		y = s;

-		z = s;

-	}

-

-	inline Vector::Vector(const Vector &v)

-	{

-		x = v.x;

-		y = v.y;

-		z = v.z;

-	}

-

-	inline Vector::Vector(const Point &P)

-	{

-		x = P.x;

-		y = P.y;

-		z = P.z;

-	}

-

-	inline Vector::Vector(float v_x, float v_y, float v_z)

-	{

-		x = v_x;

-		y = v_y;

-		z = v_z;

-	}

-

-	inline Vector &Vector::operator=(const Vector &v)

-	{

-		x = v.x;

-		y = v.y;

-		z = v.z;

-

-		return *this;

-	}

-

-	inline float &Vector::operator()(int i)

-	{

-		return v[i];

-	}

-

-	inline float &Vector::operator[](int i)

-	{

-		return v[i];

-	}

-

-	inline const float &Vector::operator()(int i) const

-	{

-		return v[i];

-	}

-

-	inline const float &Vector::operator[](int i) const

-	{

-		return v[i];

-	}

-}

-

-#endif   // Vector_hpp

+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2011 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#ifndef Vector_hpp
+#define Vector_hpp
+
+namespace sw
+{
+	struct Point;
+	struct Matrix;
+	struct Plane;
+
+	struct Vector
+	{
+		Vector();
+		Vector(const int i);
+		Vector(const Vector &v);
+		Vector(const Point &p);
+		Vector(float v_x, float v_y, float v_z);
+
+		Vector &operator=(const Vector &v);
+
+		union
+		{
+			float v[3];
+
+			struct
+			{
+				float x;
+				float y;
+				float z;
+			};
+		};
+
+		float &operator[](int i);
+		float &operator()(int i);
+
+		const float &operator[](int i) const;
+		const float &operator()(int i) const;
+
+		Vector operator+() const;
+		Vector operator-() const;
+
+		Vector &operator+=(const Vector &v);
+		Vector &operator-=(const Vector &v);
+		Vector &operator*=(float s);
+		Vector &operator/=(float s);
+
+		friend bool operator==(const Vector &u, const Vector &v);
+		friend bool operator!=(const Vector &u, const Vector &v);
+
+		friend Vector operator+(const Vector &u, const Vector &v);
+		friend Vector operator-(const Vector &u, const Vector &v);
+		friend float operator*(const Vector &u, const Vector &v);   // Dot product
+		friend Vector operator*(float s, const Vector &v);
+		friend Vector operator*(const Vector &v, float s);
+		friend Vector operator/(const Vector &v, float s);
+		friend float operator^(const Vector &u, const Vector &v);   // Angle between vectors
+		friend Vector operator%(const Vector &u, const Vector &v);   // Cross product
+
+		friend Vector operator*(const Matrix &M, const Vector& v);
+		friend Vector operator*(const Vector &v, const Matrix &M);
+		friend Vector &operator*=(Vector &v, const Matrix &M);
+
+		static float N(const Vector &v);   // Norm
+		static float N2(const Vector &v);   // Squared norm
+
+		static Vector mirror(const Vector &v, const Plane &p);
+		static Vector reflect(const Vector &v, const Plane &p);
+		static Vector lerp(const Vector &u, const Vector &v, float t);
+	};
+}
+
+#include "Point.hpp"
+
+namespace sw
+{
+	inline Vector::Vector()
+	{
+	}
+
+	inline Vector::Vector(const int i)
+	{
+		const float s = (float)i;
+
+		x = s;
+		y = s;
+		z = s;
+	}
+
+	inline Vector::Vector(const Vector &v)
+	{
+		x = v.x;
+		y = v.y;
+		z = v.z;
+	}
+
+	inline Vector::Vector(const Point &P)
+	{
+		x = P.x;
+		y = P.y;
+		z = P.z;
+	}
+
+	inline Vector::Vector(float v_x, float v_y, float v_z)
+	{
+		x = v_x;
+		y = v_y;
+		z = v_z;
+	}
+
+	inline Vector &Vector::operator=(const Vector &v)
+	{
+		x = v.x;
+		y = v.y;
+		z = v.z;
+
+		return *this;
+	}
+
+	inline float &Vector::operator()(int i)
+	{
+		return v[i];
+	}
+
+	inline float &Vector::operator[](int i)
+	{
+		return v[i];
+	}
+
+	inline const float &Vector::operator()(int i) const
+	{
+		return v[i];
+	}
+
+	inline const float &Vector::operator[](int i) const
+	{
+		return v[i];
+	}
+}
+
+#endif   // Vector_hpp
diff --git a/src/Renderer/Vertex.hpp b/src/Renderer/Vertex.hpp
index 0b593a7..c34e276 100644
--- a/src/Renderer/Vertex.hpp
+++ b/src/Renderer/Vertex.hpp
@@ -1,6 +1,6 @@
 // SwiftShader Software Renderer

 //

-// Copyright(c) 2005-2011 TransGaming Inc.

+// Copyright(c) 2005-2012 TransGaming Inc.

 //

 // All rights reserved. No part of this software may be copied, distributed, transmitted,

 // transcribed, stored in a retrieval system, translated into any human or computer

@@ -70,7 +70,7 @@
 					};

 				};

 

-				Color<float> C[2];   // Diffuse and specular color

+				float4 C[2];   // Diffuse and specular color

 

 				UVWQ T[8];           // Texture coordinates

 

diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index c660ef6..ce8feda 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -19,10 +19,12 @@
 #include "Constants.hpp"
 #include "Debug.hpp"
 
-#include <malloc.h>
+#include <string.h>
 
 namespace sw
 {
+	bool precacheVertex = false;
+
 	void VertexCache::clear()
 	{
 		for(int i = 0; i < 16; i++)
@@ -97,7 +99,6 @@
 			updateModelMatrix[i] = true;
 		}
 
-		precacheDLL = 0;
 		routineCache = 0;
 		setRoutineCacheSize(1024);
 	}
@@ -113,46 +114,6 @@
 		context->input[index] = stream;
 	}
 
-	void VertexProcessor::setInputPositionStream(const Stream &stream)
-	{
-		context->input[Position] = stream;
-	}
-
-	void VertexProcessor::setInputBlendWeightStream(const Stream &stream)
-	{
-		context->input[BlendWeight] = stream;
-	}
-
-	void VertexProcessor::setInputBlendIndicesStream(const Stream &stream)
-	{
-		context->input[BlendIndices] = stream;
-	}
-
-	void VertexProcessor::setInputNormalStream(const Stream &stream)
-	{
-		context->input[Normal] = stream;
-	}
-
-	void VertexProcessor::setInputPSizeStream(const Stream &stream)
-	{
-		context->input[PSize] = stream;
-	}
-
-	void VertexProcessor::setInputTexCoordStream(const Stream &stream, int index)
-	{
-		context->input[TexCoord0 + index] = stream;
-	}
-
-	void VertexProcessor::setInputPositiontStream(const Stream &stream)
-	{
-		context->input[PositionT] = stream;
-	}
-
-	void VertexProcessor::setInputColorStream(const Stream &stream, int index)
-	{
-		context->input[Color0 + index] = stream;
-	}
-
 	void VertexProcessor::resetInputStreams(bool preTransformed)
 	{
 		for(int i = 0; i < 16; i++)
@@ -767,7 +728,7 @@
 	void VertexProcessor::setRoutineCacheSize(int cacheSize)
 	{
 		delete routineCache;
-		routineCache = new LRUCache<State, Routine>(clamp(cacheSize, 1, 65536));
+		routineCache = new RoutineCache<State>(clamp(cacheSize, 1, 65536), precacheVertex ? "sw-vertex" : 0);
 	}
 
 	const VertexProcessor::State VertexProcessor::update()
@@ -837,8 +798,8 @@
 		state.pointScaleActive = context->pointScaleActive();
 
 		state.preTransformed = context->preTransformed;
-		state.superSampling = context->renderTarget[0]->getSuperSampleCount() > 1;
-		state.multiSampling = context->renderTarget[0]->getMultiSampleCount() > 1;
+		state.superSampling = context->getSuperSampleCount() > 1;
+		state.multiSampling = context->getMultiSampleCount() > 1;
 
 		for(int i = 0; i < 16; i++)
 		{
@@ -930,7 +891,7 @@
 				}
 			}
 
-			if(context->input[PSize])
+			if(context->input[PointSize])
 			{
 				state.output[Pts].yWrite = true;
 			}
diff --git a/src/Renderer/VertexProcessor.hpp b/src/Renderer/VertexProcessor.hpp
index 5e3ff96..3def5ba 100644
--- a/src/Renderer/VertexProcessor.hpp
+++ b/src/Renderer/VertexProcessor.hpp
@@ -14,11 +14,10 @@
 

 #include "Matrix.hpp"

 #include "Context.hpp"

-#include "LRUCache.hpp"

+#include "RoutineCache.hpp"

 

 namespace sw

 {

-	class Routine;

 	struct DrawData;

 

 	struct VertexCache   // FIXME: Variable size

@@ -173,23 +172,13 @@
 			float pointScaleC;

 		};

 

-		typedef void (__cdecl *RoutinePointer)(Vertex *output, unsigned int *batch, VertexTask *vertexTask, DrawData *draw);

+		typedef void (*RoutinePointer)(Vertex *output, unsigned int *batch, VertexTask *vertexTask, DrawData *draw);

 

 		VertexProcessor(Context *context);

 

 		virtual ~VertexProcessor();

 

 		virtual void setInputStream(int index, const Stream &stream);

-

-		virtual void setInputPositionStream(const Stream &stream);

-		virtual void setInputBlendWeightStream(const Stream &stream);

-		virtual void setInputBlendIndicesStream(const Stream &stream);

-		virtual void setInputNormalStream(const Stream &stream);

-		virtual void setInputPSizeStream(const Stream &stream);

-		virtual void setInputTexCoordStream(const Stream &stream, int index);

-		virtual void setInputPositiontStream(const Stream &stream);

-		virtual void setInputColorStream(const Stream &stream, int index);

-

 		virtual void resetInputStreams(bool preTransformed);

 

 		virtual void setFloatConstant(unsigned int index, const float value[4]);

@@ -287,8 +276,7 @@
 

 		Context *const context;

 

-		LRUCache<State, Routine> *routineCache;

-		HMODULE precacheDLL;

+		RoutineCache<State> *routineCache;

 

 	protected:

 		Matrix M[12];      // Model/Geometry/World matrix

diff --git a/src/Renderer/Viewport.cpp b/src/Renderer/Viewport.cpp
deleted file mode 100644
index 975c699..0000000
--- a/src/Renderer/Viewport.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-// SwiftShader Software Renderer
-//
-// Copyright(c) 2005-2011 TransGaming Inc.
-//
-// All rights reserved. No part of this software may be copied, distributed, transmitted,
-// transcribed, stored in a retrieval system, translated into any human or computer
-// language by any means, or disclosed to third parties without the explicit written
-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
-// or implied, including but not limited to any patent rights, are granted to you.
-//
-
-#include "Viewport.hpp"
-
-#include "Math.hpp"
-
-namespace sw
-{
-	Viewport::Viewport()
-	{
-		width = 0;
-		height = 0;
-
-		left = 0;
-		top = 0;
-	}
-
-	Viewport::~Viewport()
-	{
-	}
-
-	void Viewport::setLeft(float l)
-	{
-		left = l;
-	}
-
-	void Viewport::setTop(float t)
-	{
-		top = t;
-	}
-
-	void Viewport::setWidth(float w)
-	{
-		width = w;
-	}
-
-	void Viewport::setHeight(float h)
-	{
-		height = h;
-	}
-
-	void Viewport::setNear(float n)
-	{
-		min = n;
-	}
-
-	void Viewport::setFar(float f)
-	{
-		max = f;
-	}
-
-	float Viewport::getLeft() const
-	{
-		return left;
-	}
-
-	float Viewport::getTop() const
-	{
-		return top;
-	}
-
-	float Viewport::getWidth() const
-	{
-		return width;
-	}
-
-	float Viewport::getHeight() const
-	{
-		return height;
-	}
-
-	float Viewport::getNear() const
-	{
-		return min;
-	}
-
-	float Viewport::getFar() const
-	{
-		return max;
-	}
-}
diff --git a/src/Renderer/Viewport.hpp b/src/Renderer/Viewport.hpp
deleted file mode 100644
index a946c69..0000000
--- a/src/Renderer/Viewport.hpp
+++ /dev/null
@@ -1,48 +0,0 @@
-// SwiftShader Software Renderer

-//

-// Copyright(c) 2005-2011 TransGaming Inc.

-//

-// All rights reserved. No part of this software may be copied, distributed, transmitted,

-// transcribed, stored in a retrieval system, translated into any human or computer

-// language by any means, or disclosed to third parties without the explicit written

-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express

-// or implied, including but not limited to any patent rights, are granted to you.

-//

-

-#ifndef sw_Viewport_hpp

-#define sw_Viewport_hpp

-

-namespace sw

-{

-	class Viewport

-	{

-	public:

-		Viewport();

-

-		~Viewport();

-

-		void setLeft(float l);

-		void setTop(float t);

-		void setWidth(float w);

-		void setHeight(float h);

-		void setNear(float n);

-		void setFar(float f);

-

-		float getLeft() const;

-		float getTop() const;

-		float getWidth() const;

-		float getHeight() const;

-		float getNear() const;

-		float getFar() const;

-

-	private:

-		float left;     // Leftmost pixel column

-		float top;      // Highest pixel row

-		float width;    // Width in pixels

-		float height;   // Height in pixels

-		float min;

-		float max;

-	};

-}

-

-#endif   // sw_Viewport_hpp