Refactor sampling functions to use a return value.

Change-Id: Ib62f310abecbc4cdaf6e9300791600f25af0eaf3
Reviewed-on: https://swiftshader-review.googlesource.com/13550
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Shader/PixelPipeline.cpp b/src/Shader/PixelPipeline.cpp
index 15a12c2..91535ad 100644
--- a/src/Shader/PixelPipeline.cpp
+++ b/src/Shader/PixelPipeline.cpp
@@ -49,7 +49,7 @@
 
 			if(state.textureStage[stage].usesTexture)
 			{
-				sampleTexture(texture, stage, stage);
+				texture = sampleTexture(stage, stage);
 			}
 
 			blendTexture(temp, texture, stage);
@@ -1207,7 +1207,7 @@
 		current.z = AddSat(current.z, specular.z);
 	}
 
-	void PixelPipeline::sampleTexture(Vector4s &c, int coordinates, int stage, bool project)
+	Vector4s PixelPipeline::sampleTexture(int coordinates, int stage, bool project)
 	{
 		Float4 x = v[2 + coordinates].x;
 		Float4 y = v[2 + coordinates].y;
@@ -1222,11 +1222,13 @@
 			perturbate = false;
 		}
 
-		sampleTexture(c, stage, x, y, z, w, project);
+		return sampleTexture(stage, x, y, z, w, project);
 	}
 
-	void PixelPipeline::sampleTexture(Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project)
+	Vector4s PixelPipeline::sampleTexture(int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project)
 	{
+		Vector4s c;
+
 		#if PERF_PROFILE
 			Long texTime = Ticks();
 		#endif
@@ -1238,7 +1240,7 @@
 
 		if(!project)
 		{
-			SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, c, u, v, w, q, dsx, dsy);
+			c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u, v, w, q, dsx, dsy);
 		}
 		else
 		{
@@ -1248,12 +1250,14 @@
 			Float4 v_q = v * rq;
 			Float4 w_q = w * rq;
 
-			SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy);
+			c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u_q, v_q, w_q, q, dsx, dsy);
 		}
 
 		#if PERF_PROFILE
 			cycles[PERF_TEX] += Ticks() - texTime;
 		#endif
+
+		return c;
 	}
 
 	Short4 PixelPipeline::convertFixed12(RValue<Float4> cf)
@@ -1470,22 +1474,18 @@
 	{
 		// FIXME: Long fixed-point multiply fixup
 		{ dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
-		{
-		dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
-	}
-		{dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
-		{dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
+		{ dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y); }
+		{ dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
+		{ dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
 	}
 
 	void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
 	{
 		// FIXME: Long fixed-point multiply fixup
 		{ dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); }
-		{
-		dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y);
-	}
-		{dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
-		{dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
+		{ dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); }
+		{ dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
+		{ dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
 	}
 
 	void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
@@ -1647,7 +1647,7 @@
 		v_ = Float4(0.0f);
 		w_ = Float4(0.0f);
 
-		sampleTexture(dst, stage, u_, v_, w_, w_);
+		dst = sampleTexture(stage, u_, v_, w_, w_);
 	}
 
 	void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
@@ -1675,7 +1675,7 @@
 
 	void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
 	{
-		sampleTexture(dst, sampler, u, v, s, s, project);
+		dst = sampleTexture(sampler, u, v, s, s, project);
 	}
 
 	void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project)
@@ -1684,7 +1684,7 @@
 		Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
 		Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
 
-		sampleTexture(dst, sampler, u, v, s, s, project);
+		dst = sampleTexture(sampler, u, v, s, s, project);
 	}
 
 	void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
@@ -1705,7 +1705,7 @@
 		Float4 u_ = u + du;
 		Float4 v_ = v + dv;
 
-		sampleTexture(dst, stage, u_, v_, s, s);
+		dst = sampleTexture(stage, u_, v_, s, s);
 	}
 
 	void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
@@ -1726,7 +1726,7 @@
 		Float4 u_ = u + du;
 		Float4 v_ = v + dv;
 
-		sampleTexture(dst, stage, u_, v_, s, s);
+		dst = sampleTexture(stage, u_, v_, s, s);
 
 		Short4 L;
 
@@ -1748,7 +1748,7 @@
 		Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
 		Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
 
-		sampleTexture(dst, stage, u, v, s, s);
+		dst = sampleTexture(stage, u, v, s, s);
 	}
 
 	void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage)
@@ -1757,7 +1757,7 @@
 		Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
 		Float4 s = v;
 
-		sampleTexture(dst, stage, u, v, s, s);
+		dst = sampleTexture(stage, u, v, s, s);
 	}
 
 	void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage)
@@ -1766,7 +1766,7 @@
 		Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
 		Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
 
-		sampleTexture(dst, stage, u, v, s, s);
+		dst = sampleTexture(stage, u, v, s, s);
 	}
 
 	void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
@@ -1790,7 +1790,7 @@
 
 		w_ = Float4(0.0f);
 
-		sampleTexture(dst, stage, u_, v_, w_, w_);
+		dst = sampleTexture(stage, u_, v_, w_, w_);
 	}
 
 	void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
@@ -1861,14 +1861,14 @@
 		v__ -= E[1] * u_;
 		w__ -= E[2] * u_;
 
-		sampleTexture(dst, stage, u__, v__, w__, w__);
+		dst = sampleTexture(stage, u__, v__, w__, w__);
 	}
 
 	void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
 	{
 		TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
 
-		sampleTexture(dst, stage, u_, v_, w_, w_);
+		dst = sampleTexture(stage, u_, v_, w_, w_);
 	}
 
 	void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0)
@@ -1905,7 +1905,7 @@
 		v__ -= E[1] * u_;
 		w__ -= E[2] * u_;
 
-		sampleTexture(dst, stage, u__, v__, w__, w__);
+		dst = sampleTexture(stage, u__, v__, w__, w__);
 	}
 
 	void PixelPipeline::TEXDEPTH()
diff --git a/src/Shader/PixelPipeline.hpp b/src/Shader/PixelPipeline.hpp
index d8d2a03..66f0ec7 100644
--- a/src/Shader/PixelPipeline.hpp
+++ b/src/Shader/PixelPipeline.hpp
@@ -59,8 +59,8 @@
 		void fogBlend(Vector4s &current, Float4 &fog);
 		void specularPixel(Vector4s &current, Vector4s &specular);
 
-		void sampleTexture(Vector4s &c, int coordinates, int sampler, bool project = false);
-		void sampleTexture(Vector4s &c, int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project = false);
+		Vector4s sampleTexture(int coordinates, int sampler, bool project = false);
+		Vector4s sampleTexture(int sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project = false);
 
 		Short4 convertFixed12(RValue<Float4> cf);
 		void convertFixed12(Vector4s &cs, Vector4f &cf);
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
index f4ee8a9..87ed7c8 100644
--- a/src/Shader/PixelProgram.cpp
+++ b/src/Shader/PixelProgram.cpp
@@ -678,13 +678,13 @@
 		}
 	}
 
-	void PixelProgram::sampleTexture(Vector4f &c, const Src &sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
+	Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
 	{
 		Vector4f tmp;
 
 		if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
 		{
-			sampleTexture(tmp, sampler.index, uvwq, dsx, dsy, offset, function);
+			tmp = sampleTexture(sampler.index, uvwq, dsx, dsy, offset, function);
 		}
 		else
 		{
@@ -696,31 +696,36 @@
 				{
 					If(index == i)
 					{
-						sampleTexture(tmp, i, uvwq, dsx, dsy, offset, function);
+						tmp = sampleTexture(i, uvwq, dsx, dsy, offset, function);
 						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
 					}
 				}
 			}
 		}
 
+		Vector4f c;
 		c.x = tmp[(sampler.swizzle >> 0) & 0x3];
 		c.y = tmp[(sampler.swizzle >> 2) & 0x3];
 		c.z = tmp[(sampler.swizzle >> 4) & 0x3];
 		c.w = tmp[(sampler.swizzle >> 6) & 0x3];
+
+		return c;
 	}
 
-	void PixelProgram::sampleTexture(Vector4f &c, int samplerIndex, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
+	Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
 	{
 		#if PERF_PROFILE
 			Long texTime = Ticks();
 		#endif
 
 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture);
-		SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, c, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
+		Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
 
 		#if PERF_PROFILE
 			cycles[PERF_TEX] += Ticks() - texTime;
 		#endif
+
+		return c;
 	}
 
 	void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
@@ -1118,58 +1123,58 @@
 			proj.y = src0.y * rw;
 			proj.z = src0.z * rw;
 
-			sampleTexture(dst, src1, proj, src0, src0, src0, Implicit);
+			dst = sampleTexture(src1, proj, src0, src0, src0, Implicit);
 		}
 		else
 		{
-			sampleTexture(dst, src1, src0, src0, src0, src0, bias ? Bias : Implicit);
+			dst = sampleTexture(src1, src0, src0, src0, src0, bias ? Bias : Implicit);
 		}
 	}
 
 	void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, bool bias)
 	{
-		sampleTexture(dst, src1, src0, src0, src0, src2, {bias ? Bias : Implicit, Offset});
+		dst = sampleTexture(src1, src0, src0, src0, src2, {bias ? Bias : Implicit, Offset});
 	}
 
 	void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, bool bias)
 	{
-		sampleTexture(dst, src1, src0, src0, src0, offset, {Lod, Offset});
+		dst = sampleTexture(src1, src0, src0, src0, offset, {Lod, Offset});
 	}
 
 	void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1)
 	{
-		sampleTexture(dst, src1, src0, src0, src0, src0, Fetch);
+		dst = sampleTexture(src1, src0, src0, src0, src0, Fetch);
 	}
 
 	void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
 	{
-		sampleTexture(dst, src1, src0, src0, src0, offset, {Fetch, Offset});
+		dst = sampleTexture(src1, src0, src0, src0, offset, {Fetch, Offset});
 	}
 
 	void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
 	{
-		sampleTexture(dst, src1, src0, src2, src3, src0, Grad);
+		dst = sampleTexture(src1, src0, src2, src3, src0, Grad);
 	}
 
 	void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
 	{
-		sampleTexture(dst, src1, src0, src2, src3, offset, {Grad, Offset});
+		dst = sampleTexture(src1, src0, src2, src3, offset, {Grad, Offset});
 	}
 
 	void PixelProgram::TEXLDD(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3)
 	{
-		sampleTexture(dst, src1, src0, src2, src3, src0, Grad);
+		dst = sampleTexture(src1, src0, src2, src3, src0, Grad);
 	}
 
 	void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1)
 	{
-		sampleTexture(dst, src1, src0, src0, src0, src0, Lod);
+		dst = sampleTexture(src1, src0, src0, src0, src0, Lod);
 	}
 
 	void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
 	{
 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture);
-		SamplerCore::textureSize(texture, dst, lod);
+		dst = SamplerCore::textureSize(texture, lod);
 	}
 
 	void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
diff --git a/src/Shader/PixelProgram.hpp b/src/Shader/PixelProgram.hpp
index b15177a..a76a8ba 100644
--- a/src/Shader/PixelProgram.hpp
+++ b/src/Shader/PixelProgram.hpp
@@ -82,8 +82,8 @@
 		Int4 enableContinue;
 		Int4 enableLeave;
 
-		void sampleTexture(Vector4f &c, const Src &sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
-		void sampleTexture(Vector4f &c, int samplerIndex, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
+		Vector4f sampleTexture(const Src &sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
+		Vector4f sampleTexture(int samplerIndex, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
 
 		// Raster operations
 		void clampColor(Vector4f oC[RENDERTARGETS]);
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 86c08fa..ec71463 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -56,13 +56,15 @@
 	{
 	}
 
-	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy)
+	Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy)
 	{
-		sampleTexture(texture, c, u, v, w, q, dsx, dsy, dsx, Implicit, true);
+		return sampleTexture(texture, u, v, w, q, dsx, dsy, dsx, Implicit, true);
 	}
 
-	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12)
+	Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12)
 	{
+		Vector4s c;
+
 		#if PERF_PROFILE
 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
 
@@ -127,13 +129,11 @@
 
 			if(!hasFloatTexture())
 			{
-				sampleFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
+				c = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
 			}
 			else
 			{
-				Vector4f cf;
-
-				sampleFloatFilter(texture, cf, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
+				Vector4f cf = sampleFloatFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
 
 				convertFixed12(c, cf);
 			}
@@ -295,10 +295,14 @@
 			applySwizzle(state.swizzleB, c.z, col);
 			applySwizzle(state.swizzleA, c.w, col);
 		}
+
+		return c;
 	}
 
-	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
+	Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
 	{
+		Vector4f c;
+
 		#if PERF_PROFILE
 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
 
@@ -357,7 +361,7 @@
 					computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function);
 				}
 
-				sampleFloatFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
+				c = sampleFloatFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
 
 				if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture())
 				{
@@ -385,9 +389,7 @@
 			}
 			else
 			{
-				Vector4s cs;
-
-				sampleTexture(texture, cs, u, v, w, q, dsx, dsy, offset, function, false);
+				Vector4s cs = sampleTexture(texture, u, v, w, q, dsx, dsy, offset, function, false);
 
 				if(has16bitTextureFormat())
 				{
@@ -553,10 +555,14 @@
 			applySwizzle(state.swizzleB, c.z, col);
 			applySwizzle(state.swizzleA, c.w, col);
 		}
+
+		return c;
 	}
 
-	void SamplerCore::textureSize(Pointer<Byte> &texture, Vector4f &size, Float4 &lod)
+	Vector4f SamplerCore::textureSize(Pointer<Byte> &texture, Float4 &lod)
 	{
+		Vector4f size;
+
 		for(int i = 0; i < 4; ++i)
 		{
 			Int baseLevel = *Pointer<Int>(texture + OFFSET(Texture, baseLevel));
@@ -565,6 +571,8 @@
 			size.y = Insert(size.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
 			size.z = Insert(size.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
 		}
+
+		return size;
 	}
 
 	void SamplerCore::border(Short4 &mask, Float4 &coordinates)
@@ -615,20 +623,18 @@
 		return uvw;
 	}
 
-	void SamplerCore::sampleFilter(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
+	Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
 	{
-		sampleAniso(texture, c, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
+		Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
 
 		if(function == Fetch)
 		{
-			return;
+			return c;
 		}
 
 		if(state.mipmapFilter > MIPMAP_POINT)
 		{
-			Vector4s cc;
-
-			sampleAniso(texture, cc, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
+			Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
 
 			lod *= Float(1 << 16);
 
@@ -714,13 +720,17 @@
 			c.z = (borderMask & c.z) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1)));
 			c.w = (borderMask & c.w) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1)));
 		}
+
+		return c;
 	}
 
-	void SamplerCore::sampleAniso(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
 	{
+		Vector4s c;
+
 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
 		{
-			sampleQuad(texture, c, u, v, w, offset, lod, face, secondLOD, function);
+			c = sampleQuad(texture, u, v, w, offset, lod, face, secondLOD, function);
 		}
 		else
 		{
@@ -751,7 +761,7 @@
 
 			Do
 			{
-				sampleQuad(texture, c, u0, v0, w, offset, lod, face, secondLOD, function);
+				c = sampleQuad(texture, u0, v0, w, offset, lod, face, secondLOD, function);
 
 				u0 += du;
 				v0 += dv;
@@ -770,22 +780,26 @@
 			if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
 			if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
 		}
+
+		return c;
 	}
 
-	void SamplerCore::sampleQuad(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
 	{
 		if(state.textureType != TEXTURE_3D)
 		{
-			sampleQuad2D(texture, c, u, v, w, offset, lod, face, secondLOD, function);
+			return sampleQuad2D(texture, u, v, w, offset, lod, face, secondLOD, function);
 		}
 		else
 		{
-			sample3D(texture, c, u, v, w, offset, lod, secondLOD, function);
+			return sample3D(texture, u, v, w, offset, lod, secondLOD, function);
 		}
 	}
 
-	void SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
 	{
+		Vector4s c;
+
 		int componentCount = textureComponentCount();
 		bool gather = state.textureFilter == FILTER_GATHER;
 
@@ -973,10 +987,14 @@
 				c.w = c0.x;
 			}
 		}
+
+		return c;
 	}
 
-	void SamplerCore::sample3D(Pointer<Byte> &texture, Vector4s &c_, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
+	Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
 	{
+		Vector4s c_;
+
 		int componentCount = textureComponentCount();
 
 		Pointer<Byte> mipmap;
@@ -1094,22 +1112,22 @@
 			if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
 			if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
 		}
+
+		return c_;
 	}
 
-	void SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
+	Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
 	{
-		sampleFloatAniso(texture, c, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
+		Vector4f c = sampleFloatAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
 
 		if(function == Fetch)
 		{
-			return;
+			return c;
 		}
 
 		if(state.mipmapFilter > MIPMAP_POINT)
 		{
-			Vector4f cc;
-
-			sampleFloatAniso(texture, cc, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
+			Vector4f cc = sampleFloatAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
 
 			Float4 lod4 = Float4(Frac(lod));
 
@@ -1174,13 +1192,17 @@
 			c.z = As<Float4>((borderMask & As<Int4>(c.z)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2]))));
 			c.w = As<Float4>((borderMask & As<Int4>(c.w)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3]))));
 		}
+
+		return c;
 	}
 
-	void SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
 	{
+		Vector4f c;
+
 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
 		{
-			sampleFloat(texture, c, u, v, w, offset, lod, face, secondLOD, function);
+			c = sampleFloat(texture, u, v, w, offset, lod, face, secondLOD, function);
 		}
 		else
 		{
@@ -1209,7 +1231,7 @@
 
 			Do
 			{
-				sampleFloat(texture, c, u0, v0, w, offset, lod, face, secondLOD, function);
+				c = sampleFloat(texture, u0, v0, w, offset, lod, face, secondLOD, function);
 
 				u0 += du;
 				v0 += dv;
@@ -1228,22 +1250,26 @@
 			c.z = cSum.z;
 			c.w = cSum.w;
 		}
+
+		return c;
 	}
 
-	void SamplerCore::sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
 	{
 		if(state.textureType != TEXTURE_3D)
 		{
-			sampleFloat2D(texture, c, u, v, w, offset, lod, face, secondLOD, function);
+			return sampleFloat2D(texture, u, v, w, offset, lod, face, secondLOD, function);
 		}
 		else
 		{
-			sampleFloat3D(texture, c, u, v, w, offset, lod, secondLOD, function);
+			return sampleFloat3D(texture, u, v, w, offset, lod, secondLOD, function);
 		}
 	}
 
-	void SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
 	{
+		Vector4f c;
+
 		int componentCount = textureComponentCount();
 		bool gather = state.textureFilter == FILTER_GATHER;
 
@@ -1305,10 +1331,14 @@
 				c.w = c0.x;
 			}
 		}
+
+		return c;
 	}
 
-	void SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
+	Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
 	{
+		Vector4f c;
+
 		int componentCount = textureComponentCount();
 
 		Pointer<Byte> mipmap;
@@ -1385,6 +1415,8 @@
 			if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z);
 			if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w);
 		}
+
+		return c;
 	}
 
 	Float SamplerCore::log2sqrt(Float lod)
@@ -1639,7 +1671,7 @@
 		Int4 tmp = Int4(As<UShort4>(uvw));
 		tmp = tmp + As<Int4>(offset);
 
-		switch (mode)
+		switch(mode)
 		{
 		case AddressingMode::ADDRESSING_WRAP:
 			tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd;
@@ -1690,13 +1722,16 @@
 				{
 					wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)));
 				}
+
 				if(hasOffset)
 				{
 					wwww = applyOffset(wwww, offset.z, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW);
 				}
 			}
+
 			UInt4 uv(As<UInt2>(uuuu), As<UInt2>(uuu2));
 			uv += As<UInt4>(Int4(As<UShort4>(wwww))) * *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sliceP));
+
 			index[0] = Extract(As<Int4>(uv), 0);
 			index[1] = Extract(As<Int4>(uv), 1);
 			index[2] = Extract(As<Int4>(uv), 2);
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp
index 085b2d3..43ed1d1 100644
--- a/src/Shader/SamplerCore.hpp
+++ b/src/Shader/SamplerCore.hpp
@@ -49,26 +49,26 @@
 	public:
 		SamplerCore(Pointer<Byte> &constants, const Sampler::State &state);
 
-		void sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy);
-		void sampleTexture(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
-		static void textureSize(Pointer<Byte> &mipmap, Vector4f &size, Float4 &lod);
+		Vector4s sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy);
+		Vector4f sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
+		static Vector4f textureSize(Pointer<Byte> &mipmap, Float4 &lod);
 
 	private:
-		void sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12);
+		Vector4s sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12);
 
 		void border(Short4 &mask, Float4 &coordinates);
 		void border(Int4 &mask, Float4 &coordinates);
 		Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod);
-		void sampleFilter(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function);
-		void sampleAniso(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function);
-		void sampleQuad(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
-		void sampleQuad2D(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
-		void sample3D(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
-		void sampleFloatFilter(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function);
-		void sampleFloatAniso(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function);
-		void sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
-		void sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
-		void sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
+		Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function);
+		Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function);
+		Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
+		Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
+		Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
+		Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function);
+		Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function);
+		Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
+		Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
+		Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
 		Float log2sqrt(Float lod);
 		void computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
 		void computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &x, Float4 &y, Float4 &z, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
@@ -78,7 +78,6 @@
 		void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
 		void computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function);
 		Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
-		Vector4f sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
 		Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer[4]);
 		Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
 		void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD);
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index 9db65ed..ca3149a 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -1558,60 +1558,59 @@
 
 	void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1)
 	{
-		sampleTexture(dst, src1, src0, a0, a0, src0, Lod);
+		dst = sampleTexture(src1, src0, a0, a0, src0, Lod);
 	}
 
 	void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
 	{
 		src0.w = Float(0.0f);
-		sampleTexture(dst, src1, src0, a0, a0, src0, Lod);
+		dst = sampleTexture(src1, src0, a0, a0, src0, Lod);
 	}
 
 	void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
 	{
 		src0.w = Float(0.0f);
-		sampleTexture(dst, src1, src0, a0, a0, src2, {Lod, Offset});
+		dst = sampleTexture(src1, src0, a0, a0, src2, {Lod, Offset});
 	}
 
 	void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
 	{
-		sampleTexture(dst, src1, src0, a0, a0, offset, {Lod, Offset});
+		dst = sampleTexture(src1, src0, a0, a0, offset, {Lod, Offset});
 	}
 
 	void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1)
 	{
-		sampleTexture(dst, src1, src0, src0, src0, src0, Fetch);
+		dst = sampleTexture(src1, src0, src0, src0, src0, Fetch);
 	}
 
 	void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
 	{
-		sampleTexture(dst, src1, src0, src0, src0, offset, {Fetch, Offset});
+		dst = sampleTexture(src1, src0, src0, src0, offset, {Fetch, Offset});
 	}
 
 	void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
 	{
-		sampleTexture(dst, src1, src0, src2, src3, src0, Grad);
+		dst = sampleTexture(src1, src0, src2, src3, src0, Grad);
 	}
 
 	void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
 	{
-		sampleTexture(dst, src1, src0, src2, src3, offset, {Grad, Offset});
+		dst = sampleTexture(src1, src0, src2, src3, offset, {Grad, Offset});
 	}
 
 	void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
 	{
 		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture);
-		SamplerCore::textureSize(texture, dst, lod);
+		dst = SamplerCore::textureSize(texture, lod);
 	}
 
-	void VertexProgram::sampleTexture(Vector4f &c, const Src &s, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
+	Vector4f VertexProgram::sampleTexture(const Src &s, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
 	{
 		Vector4f tmp;
 
 		if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
 		{
-			Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + s.index * sizeof(Texture);
-			SamplerCore(constants, state.sampler[s.index]).sampleTexture(texture, tmp, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
+			tmp = sampleTexture(s.index, uvwq, dsx, dsy, offset, function);
 		}
 		else
 		{
@@ -1623,17 +1622,25 @@
 				{
 					If(index == i)
 					{
-						Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + i * sizeof(Texture);
-						SamplerCore(constants, state.sampler[i]).sampleTexture(texture, tmp, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
+						tmp = sampleTexture(i, uvwq, dsx, dsy, offset, function);
 						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
 					}
 				}
 			}
 		}
 
+		Vector4f c;
 		c.x = tmp[(s.swizzle >> 0) & 0x3];
 		c.y = tmp[(s.swizzle >> 2) & 0x3];
 		c.z = tmp[(s.swizzle >> 4) & 0x3];
 		c.w = tmp[(s.swizzle >> 6) & 0x3];
+
+		return c;
+	}
+
+	Vector4f VertexProgram::sampleTexture(int sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
+	{
+		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + sampler * sizeof(Texture);
+		return SamplerCore(constants, state.sampler[sampler]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
 	}
 }
diff --git a/src/Shader/VertexProgram.hpp b/src/Shader/VertexProgram.hpp
index 54c15ff..5e70392 100644
--- a/src/Shader/VertexProgram.hpp
+++ b/src/Shader/VertexProgram.hpp
@@ -117,7 +117,8 @@
 		void TEXGRAD(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2, Vector4f &src3, Vector4f &src4);
 		void TEXSIZE(Vector4f &dst, Float4 &lod, const Src&);
 
-		void sampleTexture(Vector4f &c, const Src &s, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
+		Vector4f sampleTexture(const Src &s, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
+		Vector4f sampleTexture(int sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
 
 		int ifDepth;
 		int loopRepDepth;