Added support for most formats to FP sampleTexel

This change is effectively noop by itself, but allows the
floating point path to read all texture formats (except YUV).
This is required by the floating point path to be able to
perform higher precision filtering on all formats (except YUV).

Bug swiftshader:76

Change-Id: I5d0d24c00357b4b77cca2ca8a65d082db7635b40
Reviewed-on: https://swiftshader-review.googlesource.com/10668
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 0219b22..8c20f58 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1744,12 +1744,8 @@
 		}
 	}
 
-	void SamplerCore::sampleTexel(Vector4s &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
+	void SamplerCore::sampleTexel(Vector4s &c, UInt index[4], Pointer<Byte> buffer[4])
 	{
-		UInt index[4];
-
-		computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
-
 		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
 		int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
 		int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
@@ -1924,7 +1920,16 @@
 				ASSERT(false);
 			}
 		}
-		else if(hasYuvFormat())
+		else ASSERT(false);
+	}
+
+	void SamplerCore::sampleTexel(Vector4s &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
+	{
+		UInt index[4];
+
+		computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
+
+		if(hasYuvFormat())
 		{
 			// Generic YPbPr to RGB transformation
 			// R = Y                               +           2 * (1 - Kr) * Pr
@@ -2018,7 +2023,10 @@
 			c.y = Min(g, UShort4(0x3FFF)) << 2;
 			c.z = Min(b, UShort4(0x3FFF)) << 2;
 		}
-		else ASSERT(false);
+		else
+		{
+			sampleTexel(c, index, buffer);
+		}
 	}
 
 	void SamplerCore::sampleTexel(Vector4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
@@ -2027,56 +2035,94 @@
 
 		computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
 
-		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
-		int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
-		int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
-		int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
-
-		// Read texels
-		switch(textureComponentCount())
+		if(hasFloatTexture())
 		{
-		case 4:
-			c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
-			c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
-			c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
-			c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
-			transpose4x4(c.x, c.y, c.z, c.w);
-			break;
-		case 3:
-			ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
-			c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
-			c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
-			c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
-			c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
-			transpose4x3(c.x, c.y, c.z, c.w);
-			c.w = Float4(1.0f);
-			break;
-		case 2:
-			// FIXME: Optimal shuffling?
-			c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
-			c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
-			c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
-			c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
-			c.y = c.x;
-			c.x = Float4(c.x.xz, c.z.xz);
-			c.y = Float4(c.y.yw, c.z.yw);
-			break;
-		case 1:
-			// FIXME: Optimal shuffling?
-			c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
-			c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
-			c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
-			c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
+			int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
+			int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
+			int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
+			int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
 
-			if(state.textureFormat == FORMAT_D32FS8_SHADOW && state.textureFilter != FILTER_GATHER)
+			// Read texels
+			switch(textureComponentCount())
 			{
-				Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f));
+			case 4:
+				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
+				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
+				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
+				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
+				transpose4x4(c.x, c.y, c.z, c.w);
+				break;
+			case 3:
+				ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
+				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
+				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
+				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
+				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
+				transpose4x3(c.x, c.y, c.z, c.w);
+				c.w = Float4(1.0f);
+				break;
+			case 2:
+				// FIXME: Optimal shuffling?
+				c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
+				c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
+				c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
+				c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
+				c.y = c.x;
+				c.x = Float4(c.x.xz, c.z.xz);
+				c.y = Float4(c.y.yw, c.z.yw);
+				break;
+			case 1:
+				// FIXME: Optimal shuffling?
+				c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
+				c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
+				c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
+				c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
 
-				c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f)));   // FIXME: Only less-equal?
+				if(state.textureFormat == FORMAT_D32FS8_SHADOW && state.textureFilter != FILTER_GATHER)
+				{
+					Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f));
+
+					c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f)));   // FIXME: Only less-equal?
+				}
+				break;
+			default:
+				ASSERT(false);
 			}
-			break;
-		default:
-			ASSERT(false);
+		}
+		else
+		{
+			ASSERT(!hasYuvFormat());
+
+			Vector4s cs;
+			sampleTexel(cs, index, buffer);
+
+			bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat);
+			int componentCount = textureComponentCount();
+			for(int n = 0; n < componentCount; ++n)
+			{
+				if(hasUnsignedTextureComponent(n))
+				{
+					if(isInteger)
+					{
+						c[n] = As<Float4>(Int4(As<UShort4>(cs[n])));
+					}
+					else
+					{
+						c[n] = Float4(As<UShort4>(cs[n]));
+					}
+				}
+				else
+				{
+					if(isInteger)
+					{
+						c[n] = As<Float4>(Int4(cs[n]));
+					}
+					else
+					{
+						c[n] = Float4(cs[n]);
+					}
+				}
+			}
 		}
 	}
 
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp
index 0f90cac..f4a9abd 100644
--- a/src/Shader/SamplerCore.hpp
+++ b/src/Shader/SamplerCore.hpp
@@ -76,6 +76,7 @@
 		Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
 		void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
 		void sampleTexel(Vector4s &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
+		void sampleTexel(Vector4s &c, UInt index[4], Pointer<Byte> buffer[4]);
 		void sampleTexel(Vector4f &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
 		void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD);
 		Short4 address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap);