Depth related fixes

- Depth output should write to the 1st (Red) channel only
- Depth image load should be clamped in the [0, 1] range

Change-Id: Ic7c3ac09c86d5457ec3c59bf9666e2b168226c5e
Reviewed-on: https://swiftshader-review.googlesource.com/13988
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/OpenGL/common/Image.cpp b/src/OpenGL/common/Image.cpp
index df640c4..e26af7a 100644
--- a/src/OpenGL/common/Image.cpp
+++ b/src/OpenGL/common/Image.cpp
@@ -58,6 +58,7 @@
 		D24,
 		D32,
 		D32F,
+		D32FS8,
 		S8,
 		S24_8,
 	};
@@ -377,13 +378,25 @@
 	template<>
 	void LoadImageRow<D32F>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width)
 	{
+		const float *sourceF = reinterpret_cast<const float*>(source);
+		float *destF = reinterpret_cast<float*>(dest + xoffset * 4);
+
+		for(int x = 0; x < width; x++)
+		{
+			destF[x] = sw::clamp(sourceF[x], 0.0f, 1.0f);
+		}
+	}
+
+	template<>
+	void LoadImageRow<D32FS8>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width)
+	{
 		struct D32FS8 { float depth32f; unsigned int stencil24_8; };
 		const D32FS8 *sourceD32FS8 = reinterpret_cast<const D32FS8*>(source);
 		float *destF = reinterpret_cast<float*>(dest + xoffset * 4);
 
 		for(int x = 0; x < width; x++)
 		{
-			destF[x] = sourceD32FS8[x].depth32f;
+			destF[x] = sw::clamp(sourceD32FS8[x].depth32f, 0.0f, 1.0f);
 		}
 	}
 
@@ -1538,7 +1551,7 @@
 						break;
 					case GL_DEPTH_COMPONENT:
 					case GL_DEPTH_COMPONENT32F:
-						LoadImageData<Bytes_4>(xoffset, yoffset, zoffset, width, height, depth, inputPitch, inputHeight, getPitch(), getHeight(), input, buffer);
+						LoadImageData<D32F>(xoffset, yoffset, zoffset, width, height, depth, inputPitch, inputHeight, getPitch(), getHeight(), input, buffer);
 						break;
 					default: UNREACHABLE(format);
 					}
@@ -1756,7 +1769,7 @@
 
 	void Image::loadD32FS8ImageData(GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, int inputPitch, int inputHeight, const void *input, void *buffer)
 	{
-		LoadImageData<D32F>(xoffset, yoffset, zoffset, width, height, depth, inputPitch, inputHeight, getPitch(), getHeight(), input, buffer);
+		LoadImageData<D32FS8>(xoffset, yoffset, zoffset, width, height, depth, inputPitch, inputHeight, getPitch(), getHeight(), input, buffer);
 
 		unsigned char *stencil = reinterpret_cast<unsigned char*>(lockStencil(0, 0, 0, sw::PUBLIC));
 
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 194e35a..c51dbf9 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -520,9 +520,9 @@
 				case FORMAT_D32F_LOCKABLE:
 				case FORMAT_D32FS8_TEXTURE:
 				case FORMAT_D32FS8_SHADOW:
-					c.y = c.x;
-					c.z = c.x;
-					c.w = c.x;
+					c.y = Float4(0.0f);
+					c.z = Float4(0.0f);
+					c.w = Float4(1.0f);
 					break;
 				default:
 					ASSERT(false);