Don't use Long1 for stencil operations.

Bug swiftshader:15

Change-Id: I4fa5356109e35ac13f9f8d5a97e9059262901051
Reviewed-on: https://swiftshader-review.googlesource.com/7950
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-on: https://swiftshader-review.googlesource.com/8156
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 02ad43b..25e64e8 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -3051,6 +3051,7 @@
 		int height2 = (height + 1) & ~1;
 
 		// FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
+		// and stencil operations also read 8 bytes per four 8-bit stencil values,
 		// so we have to allocate 4 extra bytes to avoid buffer overruns.
 		return allocateZero(size(width2, height2, depth, format) + 4);
 	}
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index 2f2b43d..2025da7 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -321,7 +321,7 @@
 			buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
 		}
 
-		Byte8 value = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
+		Byte8 value = *Pointer<Byte8>(buffer);
 		Byte8 valueCCW = value;
 
 		if(!state.noStencilMask)
@@ -763,7 +763,7 @@
 			buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
 		}
 
-		Byte8 bufferValue = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
+		Byte8 bufferValue = *Pointer<Byte8>(buffer);
 
 		Byte8 newValue;
 		stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);