Compute texture coordinates using floating point operations

Modified the texture coordinate computation to use floating
point operations when sampling floating point textures to
increase accuracy and remove useless type conversions.

Bug swiftshader:76

Change-Id: I76e5fd9a68aa6ba5eb21524edb8c6c6183e5430e
Reviewed-on: https://swiftshader-review.googlesource.com/10628
Tested-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Renderer/Sampler.cpp b/src/Renderer/Sampler.cpp
index 7d7d47c..24734da 100644
--- a/src/Renderer/Sampler.cpp
+++ b/src/Renderer/Sampler.cpp
@@ -207,8 +207,15 @@
 				mipmap.onePitchP[2] = 1;
 				mipmap.onePitchP[3] = pitchP;
 
+				mipmap.pitchP[0] = pitchP;
+				mipmap.pitchP[1] = pitchP;
+				mipmap.pitchP[2] = pitchP;
+				mipmap.pitchP[3] = pitchP;
+
 				mipmap.sliceP[0] = sliceP;
 				mipmap.sliceP[1] = sliceP;
+				mipmap.sliceP[2] = sliceP;
+				mipmap.sliceP[3] = sliceP;
 
 				if(internalTextureFormat == FORMAT_YV12_BT601 ||
 				   internalTextureFormat == FORMAT_YV12_BT709 ||
diff --git a/src/Renderer/Sampler.hpp b/src/Renderer/Sampler.hpp
index 288f179..6fae602 100644
--- a/src/Renderer/Sampler.hpp
+++ b/src/Renderer/Sampler.hpp
@@ -36,7 +36,8 @@
 		short height[4];
 		short depth[4];
 		short onePitchP[4];
-		int sliceP[2];
+		int4 pitchP;
+		int4 sliceP;
 	};
 
 	struct Texture
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index f40b1dd..bf0ff25 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1256,15 +1256,24 @@
 
 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
 
-		bool texelFetch = (function == Fetch);
+		Int4 x0, x1, y0, y1, z0;
+		Float4 fu, fv;
+		Int4 filter = computeFilterOffset(lod);
+		address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
+		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
+		address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
 
-		Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
-		Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
-		Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
-
-		if(state.textureFilter == FILTER_POINT || texelFetch)
+		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
+		y0 *= pitchP;
+		if(hasThirdCoordinate())
 		{
-			sampleTexel(c, uuuu, vvvv, wwww, offset, w, mipmap, buffer, function);
+			Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
+			z0 *= sliceP;
+		}
+
+		if(state.textureFilter == FILTER_POINT || (function == Fetch))
+		{
+			sampleTexel(c, x0, y0, z0, w, mipmap, buffer, function);
 		}
 		else
 		{
@@ -1273,22 +1282,15 @@
 			Vector4f c2;
 			Vector4f c3;
 
-			Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
-			Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
-			Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
-			Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
+			y1 *= pitchP;
 
-			sampleTexel(c0, uuuu0, vvvv0, wwww, offset, w, mipmap, buffer, function);
-			sampleTexel(c1, uuuu1, vvvv0, wwww, offset, w, mipmap, buffer, function);
-			sampleTexel(c2, uuuu0, vvvv1, wwww, offset, w, mipmap, buffer, function);
-			sampleTexel(c3, uuuu1, vvvv1, wwww, offset, w, mipmap, buffer, function);
+			sampleTexel(c0, x0, y0, z0, w, mipmap, buffer, function);
+			sampleTexel(c1, x1, y0, z0, w, mipmap, buffer, function);
+			sampleTexel(c2, x0, y1, z0, w, mipmap, buffer, function);
+			sampleTexel(c3, x1, y1, z0, w, mipmap, buffer, function);
 
 			if(!gather)   // Blend
 			{
-				// Fractions
-				Float4 fu = Frac(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth)));
-				Float4 fv = Frac(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight)));
-
 				if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
 				if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
 				if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
@@ -1324,15 +1326,21 @@
 
 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
 
-		bool texelFetch = (function == Fetch);
+		Int4 x0, x1, y0, y1, z0, z1;
+		Float4 fu, fv, fw;
+		Int4 filter = computeFilterOffset(lod);
+		address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
+		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
+		address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
 
-		Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
-		Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
-		Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
+		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
+		Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
+		y0 *= pitchP;
+		z0 *= sliceP;
 
-		if(state.textureFilter == FILTER_POINT || texelFetch)
+		if(state.textureFilter == FILTER_POINT || (function == Fetch))
 		{
-			sampleTexel(c, uuuu, vvvv, wwww, offset, w, mipmap, buffer, function);
+			sampleTexel(c, x0, y0, z0, w, mipmap, buffer, function);
 		}
 		else
 		{
@@ -1345,26 +1353,17 @@
 			Vector4f c6;
 			Vector4f c7;
 
-			Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
-			Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
-			Short4 wwww0 = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, -1, lod);
-			Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
-			Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
-			Short4 wwww1 = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, +1, lod);
+			y1 *= pitchP;
+			z1 *= sliceP;
 
-			sampleTexel(c0, uuuu0, vvvv0, wwww0, offset, w, mipmap, buffer, function);
-			sampleTexel(c1, uuuu1, vvvv0, wwww0, offset, w, mipmap, buffer, function);
-			sampleTexel(c2, uuuu0, vvvv1, wwww0, offset, w, mipmap, buffer, function);
-			sampleTexel(c3, uuuu1, vvvv1, wwww0, offset, w, mipmap, buffer, function);
-			sampleTexel(c4, uuuu0, vvvv0, wwww1, offset, w, mipmap, buffer, function);
-			sampleTexel(c5, uuuu1, vvvv0, wwww1, offset, w, mipmap, buffer, function);
-			sampleTexel(c6, uuuu0, vvvv1, wwww1, offset, w, mipmap, buffer, function);
-			sampleTexel(c7, uuuu1, vvvv1, wwww1, offset, w, mipmap, buffer, function);
-
-			// Fractions
-			Float4 fu = Frac(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth)));
-			Float4 fv = Frac(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight)));
-			Float4 fw = Frac(Float4(As<UShort4>(wwww0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fDepth)));
+			sampleTexel(c0, x0, y0, z0, w, mipmap, buffer, function);
+			sampleTexel(c1, x1, y0, z0, w, mipmap, buffer, function);
+			sampleTexel(c2, x0, y1, z0, w, mipmap, buffer, function);
+			sampleTexel(c3, x1, y1, z0, w, mipmap, buffer, function);
+			sampleTexel(c4, x0, y0, z1, w, mipmap, buffer, function);
+			sampleTexel(c5, x1, y0, z1, w, mipmap, buffer, function);
+			sampleTexel(c6, x0, y1, z1, w, mipmap, buffer, function);
+			sampleTexel(c7, x1, y1, z1, w, mipmap, buffer, function);
 
 			// Blend first slice
 			if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
@@ -1701,7 +1700,7 @@
 		uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
 		uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
 
-		if((state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY))
+		if(hasThirdCoordinate())
 		{
 			if(state.textureType != TEXTURE_2D_ARRAY)
 			{
@@ -1731,7 +1730,7 @@
 		if(texelFetch)
 		{
 			Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP)));
-			if((state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY))
+			if(hasThirdCoordinate())
 			{
 				size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)));
 			}
@@ -1745,6 +1744,21 @@
 		}
 	}
 
+	void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function)
+	{
+		UInt4 indices = uuuu + vvvv;
+
+		if(hasThirdCoordinate())
+		{
+			indices += As<UInt4>(wwww);
+		}
+
+		for(int i = 0; i < 4; i++)
+		{
+			index[i] = Extract(As<Int4>(indices), i);
+		}
+	}
+
 	void SamplerCore::sampleTexel(Vector4s &c, UInt index[4], Pointer<Byte> buffer[4])
 	{
 		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
@@ -2030,11 +2044,11 @@
 		}
 	}
 
-	void SamplerCore::sampleTexel(Vector4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
+	void SamplerCore::sampleTexel(Vector4f &c, Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
 	{
 		UInt index[4];
 
-		computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
+		computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
 
 		if(hasFloatTexture())
 		{
@@ -2168,6 +2182,21 @@
 		}
 	}
 
+	Int4 SamplerCore::computeFilterOffset(Float &lod)
+	{
+		Int4 filtering((state.textureFilter == FILTER_POINT) ? 0 : 1);
+		if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
+		{
+			filtering &= CmpNLE(Float4(lod), Float4(0.0f));
+		}
+		else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
+		{
+			filtering &= CmpLE(Float4(lod), Float4(0.0f));
+		}
+
+		return filtering;
+	}
+
 	Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap)
 	{
 		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
@@ -2210,6 +2239,119 @@
 		}
 	}
 
+	void SamplerCore::address(Float4 &uvw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
+	{
+		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
+		{
+			return; // Unused
+		}
+
+		Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16));
+		Int4 maxXYZ = dim - Int4(1);
+
+		if(function == Fetch)
+		{
+			xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
+		}
+		else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) // Note: Offset does not apply to array layers
+		{
+			xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
+		}
+		else
+		{
+			const int halfBits = 0x3effffff; // Value just under 0.5f
+			const int oneBits  = 0x3f7fffff; // Value just under 1.0f
+			const int twoBits  = 0x3fffffff; // Value just under 2.0f
+
+			Float4 coord = Float4(dim);
+			switch(addressingMode)
+			{
+			case ADDRESSING_CLAMP:
+				{
+					Float4 one = As<Float4>(Int4(oneBits));
+					coord *= Min(Max(uvw, Float4(0.0f)), one);
+				}
+				break;
+			case ADDRESSING_MIRROR:
+				{
+					Float4 half = As<Float4>(Int4(halfBits));
+					Float4 one = As<Float4>(Int4(oneBits));
+					Float4 two = As<Float4>(Int4(twoBits));
+					coord *= one - Abs(two * Frac(uvw * half) - one);
+				}
+				break;
+			case ADDRESSING_MIRRORONCE:
+				{
+					Float4 half = As<Float4>(Int4(halfBits));
+					Float4 one = As<Float4>(Int4(oneBits));
+					Float4 two = As<Float4>(Int4(twoBits));
+					coord *= one - Abs(two * Frac(Min(Max(uvw, -one), two) * half) - one);
+				}
+				break;
+			default:   // Wrap (or border)
+				coord *= Frac(uvw);
+				break;
+			}
+
+			xyz0 = Int4(coord);
+
+			if(function.option == Offset)
+			{
+				xyz0 += As<Int4>(texOffset);
+				switch(addressingMode)
+				{
+				case ADDRESSING_MIRROR:
+				case ADDRESSING_MIRRORONCE:
+				case ADDRESSING_BORDER:
+					// FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp.
+				case ADDRESSING_CLAMP:
+					xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
+					break;
+				default:   // Wrap
+					xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
+					break;
+				}
+			}
+
+			if(state.textureFilter != FILTER_POINT) // Compute 2nd coordinate, if needed
+			{
+				bool gather = state.textureFilter == FILTER_GATHER;
+
+				xyz1 = xyz0 + filter; // Increment
+
+				if(!gather)
+				{
+					Float4 frac = Frac(coord);
+					f = Abs(frac - Float4(0.5f));
+					xyz1 -= CmpLT(frac, Float4(0.5f)) & (filter + filter); // Decrement xyz if necessary
+				}
+
+				switch(addressingMode)
+				{
+				case ADDRESSING_MIRROR:
+				case ADDRESSING_MIRRORONCE:
+				case ADDRESSING_BORDER:
+					// FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp.
+				case ADDRESSING_CLAMP:
+					xyz1 = gather ? Min(xyz1, maxXYZ) : Min(Max(xyz1, Int4(0)), maxXYZ);
+					break;
+				default:   // Wrap
+					{
+						// The coordinates overflow or underflow by at most 1
+						Int4 over = CmpNLT(xyz1, dim);
+						xyz1 = (over & Int4(0)) | (~over & xyz1); // xyz >= dim ? 0 : xyz
+						if(!gather)
+						{
+							Int4 under = CmpLT(xyz1, Int4(0));
+							xyz1 = (under & maxXYZ) | (~under & xyz1); // xyz < 0 ? dim - 1 : xyz
+						}
+					}
+					break;
+				}
+			}
+		}
+	}
+
 	void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf)
 	{
 		cs = RoundShort4(cf * Float4(0x1000));
@@ -2297,6 +2439,11 @@
 		return Surface::componentCount(state.textureFormat);
 	}
 
+	bool SamplerCore::hasThirdCoordinate() const
+	{
+		return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY);
+	}
+
 	bool SamplerCore::has16bitTextureFormat() const
 	{
 		switch(state.textureFormat)
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp
index f84e4f9..62c0116 100644
--- a/src/Shader/SamplerCore.hpp
+++ b/src/Shader/SamplerCore.hpp
@@ -76,11 +76,15 @@
 		void cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodX, Float4 &lodY, Float4 &lodZ, Float4 &x, Float4 &y, Float4 &z);
 		Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
 		void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
+		void computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function);
 		void sampleTexel(Vector4s &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
-		void sampleTexel(Vector4s &c, UInt index[4], Pointer<Byte> buffer[4]);
 		void sampleTexel(Vector4f &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
+		void sampleTexel(Vector4s &c, UInt index[4], Pointer<Byte> buffer[4]);
+		void sampleTexel(Vector4f &c, Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
 		void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD);
 		Short4 address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap);
+		void address(Float4 &uw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
+		Int4 computeFilterOffset(Float &lod);
 
 		void convertFixed12(Short4 &ci, Float4 &cf);
 		void convertFixed12(Vector4s &cs, Vector4f &cf);
@@ -94,6 +98,7 @@
 		bool hasFloatTexture() const;
 		bool hasUnsignedTextureComponent(int component) const;
 		int textureComponentCount() const;
+		bool hasThirdCoordinate() const;
 		bool has16bitTextureFormat() const;
 		bool has8bitTextureComponents() const;
 		bool has16bitTextureComponents() const;