Remove quadLayout

Because stencil buffers can now be texture sampled, used in
cubemaps, blitted, etc, the quadLayout used for stencil
buffers requires constant texture coordinate conversion in
order to perform various tasks involving stencil buffers.
For now, the simplest solution is to remove the quadLayout
used by the stencil buffer.

Fixes all issues in the following subcategory using SwANGLE:
dEQP-GLES31.functional.stencil_texturing.format.*_stencil8_cube

Bug: b/144353295
Change-Id: I2a8cbdda0036146f9dca222ec330841788a3ee8b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/38188
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 0f81b6b..e06601b 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -1345,18 +1345,9 @@
 		}
 	}
 
-	Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout)
+	Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes)
 	{
-		if(!quadLayout)
-		{
-			return y * pitchB + x * bytes;
-		}
-		else
-		{
-			// (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1)
-			return (y & Int(~1)) * pitchB +
-			       ((((y & Int(1)) + x) << 1) - (x & Int(1))) * bytes;
-		}
+		return y * pitchB + x * bytes;
 	}
 
 	Float4 Blitter::LinearToSRGB(Float4 &c)
@@ -1410,8 +1401,6 @@
 			bool intSrc = state.sourceFormat.isNonNormalizedInteger();
 			bool intDst = state.destFormat.isNonNormalizedInteger();
 			bool intBoth = intSrc && intDst;
-			bool srcQuadLayout = state.sourceFormat.hasQuadLayout();
-			bool dstQuadLayout = state.destFormat.hasQuadLayout();
 			int srcBytes = state.sourceFormat.bytes();
 			int dstBytes = state.destFormat.bytes();
 
@@ -1438,12 +1427,12 @@
 			For(Int j = y0d, j < y1d, j++)
 			{
 				Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
-				Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
+				Pointer<Byte> destLine = dest + j * dPitchB;
 
 				For(Int i = x0d, i < x1d, i++)
 				{
 					Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
-					Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
+					Pointer<Byte> d = destLine + i * dstBytes;
 
 					if(hasConstantColorI)
 					{
@@ -1474,7 +1463,7 @@
 							Y = Clamp(Y, 0, sHeight - 1);
 						}
 
-						Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
+						Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
 
 						// When both formats are true integer types, we don't go to float to avoid losing precision
 						Int4 color = readInt4(s, state);
@@ -1501,7 +1490,7 @@
 								Y = Clamp(Y, 0, sHeight - 1);
 							}
 
-							Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
+							Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
 
 							color = readFloat4(s, state);
 
@@ -1550,10 +1539,10 @@
 							X1 = IfThenElse(X1 >= sWidth, X0, X1);
 							Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
 
-							Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
-							Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
-							Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
-							Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
+							Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes);
+							Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes);
+							Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes);
+							Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes);
 
 							Float4 c00 = readFloat4(s00, state);
 							Float4 c01 = readFloat4(s01, state);
@@ -1626,8 +1615,7 @@
 	{
 		auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
 		auto format = src->getFormat(aspect);
-		State state(format, format.getNonQuadLayoutFormat(), VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
-					Options{false, false});
+		State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{false, false});
 
 		auto blitRoutine = getBlitRoutine(state);
 		if(!blitRoutine)
@@ -1690,8 +1678,7 @@
 	{
 		auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
 		auto format = dst->getFormat(aspect);
-		State state(format.getNonQuadLayoutFormat(), format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
-					Options{false, false});
+		State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{false, false});
 
 		auto blitRoutine = getBlitRoutine(state);
 		if(!blitRoutine)
@@ -1891,15 +1878,14 @@
 	void Blitter::computeCubeCorner(Pointer<Byte>& layer, Int& x0, Int& x1, Int& y0, Int& y1, Int& pitchB, const State& state)
 	{
 		int bytes = state.sourceFormat.bytes();
-		bool quadLayout = state.sourceFormat.hasQuadLayout();
 
-		Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes, quadLayout), state) +
-		           readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes, quadLayout), state) +
-		           readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes, quadLayout), state);
+		Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes), state) +
+		           readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes), state) +
+		           readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes), state);
 
 		c *= Float4(1.0f / 3.0f);
 
-		write(c, layer + ComputeOffset(x0, y0, pitchB, bytes, quadLayout), state);
+		write(c, layer + ComputeOffset(x0, y0, pitchB, bytes), state);
 	}
 
 	Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State& state)
diff --git a/src/Device/Blitter.hpp b/src/Device/Blitter.hpp
index 7ed8dd4..0157e88 100644
--- a/src/Device/Blitter.hpp
+++ b/src/Device/Blitter.hpp
@@ -132,7 +132,7 @@
 		Int4 readInt4(Pointer<Byte> element, const State &state);
 		void write(Int4 &color, Pointer<Byte> element, const State &state);
 		static void ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled = false);
-		static Int ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout);
+		static Int ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes);
 		static Float4 LinearToSRGB(Float4 &color);
 		static Float4 sRGBtoLinear(Float4 &color);
 
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 1c67a7c..6908aa0 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -249,14 +249,16 @@
 
 		// (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
 
-		Pointer<Byte> buffer = sBuffer + 2 * x;
+		Pointer<Byte> buffer = sBuffer + x;
 
 		if(q > 0)
 		{
 			buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
 		}
 
-		Byte8 value = *Pointer<Byte8>(buffer);
+		Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
+		Byte8 value = *Pointer<Byte8>(buffer) & Byte8(-1, -1, 0, 0, 0, 0, 0, 0);
+		value = value | (*Pointer<Byte8>(buffer + pitch - 2) & Byte8(0, 0, -1, -1, 0, 0, 0, 0));
 		Byte8 valueBack = value;
 
 		if(state.frontStencil.compareMask != 0xff)
@@ -624,15 +626,16 @@
 			return;
 		}
 
-		Pointer<Byte> buffer = sBuffer + 2 * x;
+		Pointer<Byte> buffer = sBuffer + x;
 
 		if(q > 0)
 		{
 			buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
 		}
 
-		Byte8 bufferValue = *Pointer<Byte8>(buffer);
-
+		Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
+		Byte8 bufferValue = *Pointer<Byte8>(buffer) & Byte8(-1, -1, 0, 0, 0, 0, 0, 0);
+		bufferValue = bufferValue | (*Pointer<Byte8>(buffer + pitch - 2) & Byte8(0, 0, -1, -1, 0, 0, 0, 0));
 		Byte8 newValue;
 		stencilOperation(newValue, bufferValue, state.frontStencil, false, zMask, sMask);
 
@@ -664,7 +667,8 @@
 		bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
 		newValue |= bufferValue;
 
-		*Pointer<Byte4>(buffer) = Byte4(newValue);
+		*Pointer<Short>(buffer) = Extract(As<Short4>(newValue), 0);
+		*Pointer<Short>(buffer + pitch) = Extract(As<Short4>(newValue), 1);
 	}
 
 	void PixelRoutine::stencilOperation(Byte8 &newValue, const Byte8 &bufferValue, const PixelProcessor::States::StencilOpState &ops, bool isBack, const Int &zMask, const Int &sMask)
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index 62e3cef..afed517 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -45,13 +45,6 @@
 		default: ASSERT(false);
 		}
 	}
-
-	template <typename T>
-	void applyQuadLayout(T& x, T& y)
-	{
-		x = (((y & T(1)) + x) << 1) - (x & T(1));
-		y &= T(~1);
-	}
 }
 
 namespace sw
@@ -902,11 +895,6 @@
 		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
 		address(w, z0, z0, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
 
-		if(hasQuadLayout())
-		{
-			::applyQuadLayout(x0, y0);
-		}
-
 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
 		y0 *= pitchP;
 		if(state.addressingModeW != ADDRESSING_UNUSED)
@@ -920,11 +908,6 @@
 		}
 		else
 		{
-			if(hasQuadLayout())
-			{
-				::applyQuadLayout(x1, y1);
-			}
-
 			y1 *= pitchP;
 
 			Vector4f c00 = sampleTexel(x0, y0, z0, q, mipmap, buffer, function);
@@ -988,11 +971,6 @@
 		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
 		address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
 
-		if(hasQuadLayout())
-		{
-			::applyQuadLayout(x0, y0);
-		}
-
 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
 		Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
 		y0 *= pitchP;
@@ -1004,11 +982,6 @@
 		}
 		else
 		{
-			if(hasQuadLayout())
-			{
-				::applyQuadLayout(x1, y1);
-			}
-
 			y1 *= pitchP;
 			z1 *= sliceP;
 
@@ -1307,11 +1280,6 @@
 			                   texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV);
 		}
 
-		if(hasQuadLayout())
-		{
-			::applyQuadLayout(uuuu, vvvv);
-		}
-
 		Short4 uuu2 = uuuu;
 		uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
 		uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
@@ -2419,11 +2387,6 @@
 		return state.textureFormat.has32bitIntegerTextureComponents();
 	}
 
-	bool SamplerCore::hasQuadLayout() const
-	{
-		return state.textureFormat.hasQuadLayout();
-	}
-
 	bool SamplerCore::isYcbcrFormat() const
 	{
 		return state.textureFormat.isYcbcrFormat();
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp
index e5ab898..1c8d95e 100644
--- a/src/Pipeline/SamplerCore.hpp
+++ b/src/Pipeline/SamplerCore.hpp
@@ -105,7 +105,6 @@
 		bool has8bitTextureComponents() const;
 		bool has16bitTextureComponents() const;
 		bool has32bitIntegerTextureComponents() const;
-		bool hasQuadLayout() const;
 		bool isYcbcrFormat() const;
 		bool isRGBComponent(int component) const;
 		bool borderModeActive() const;
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index db1dcdf..e77da84 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -5379,14 +5379,6 @@
 			v += routine->windowSpacePosition[1];
 		}
 
-		if (useStencilAspect)
-		{
-			// Adjust addressing for quad layout. Pitches are already correct for the stencil aspect.
-			// In the quad-layout block, pixel order is [x0,y0   x1,y0   x0,y1   x1,y1]
-			u = ((v & SIMD::Int(1)) << 1) | ((u << 1) - (u & SIMD::Int(1)));
-			v &= SIMD::Int(~1);
-		}
-
 		auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
 															  ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
 															  : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
diff --git a/src/Vulkan/VkFormat.cpp b/src/Vulkan/VkFormat.cpp
index 9489401..8e07ea4 100644
--- a/src/Vulkan/VkFormat.cpp
+++ b/src/Vulkan/VkFormat.cpp
@@ -292,28 +292,6 @@
 	}
 }
 
-bool Format::hasQuadLayout() const
-{
-	switch(format)
-	{
-	case VK_FORMAT_S8_UINT:
-		return true;
-	default:
-		return false;
-	}
-}
-
-VkFormat Format::getNonQuadLayoutFormat() const
-{
-	switch(format)
-	{
-	case VK_FORMAT_S8_UINT:
-		return VK_FORMAT_R8_UINT;
-	default:
-		return format;
-	}
-}
-
 bool Format::isSRGBformat() const
 {
 	switch(format)
diff --git a/src/Vulkan/VkFormat.h b/src/Vulkan/VkFormat.h
index 0825009..77b9ad5 100644
--- a/src/Vulkan/VkFormat.h
+++ b/src/Vulkan/VkFormat.h
@@ -42,8 +42,6 @@
 	Format getAspectFormat(VkImageAspectFlags aspect) const;
 	bool isStencil() const;
 	bool isDepth() const;
-	bool hasQuadLayout() const;
-	VkFormat getNonQuadLayoutFormat() const;
 	bool isSRGBformat() const;
 	bool isFloatFormat() const;
 	bool isYcbcrFormat() const;
diff --git a/src/Vulkan/VkImage.cpp b/src/Vulkan/VkImage.cpp
index 6caa670..0b7224f 100644
--- a/src/Vulkan/VkImage.cpp
+++ b/src/Vulkan/VkImage.cpp
@@ -219,10 +219,9 @@
 	Format srcFormat = getFormat(srcAspect);
 	Format dstFormat = dstImage->getFormat(dstAspect);
 
-	if(((samples > VK_SAMPLE_COUNT_1_BIT) && (imageType == VK_IMAGE_TYPE_2D) && !format.isNonNormalizedInteger()) ||
-		srcFormat.hasQuadLayout() || dstFormat.hasQuadLayout())
+	if((samples > VK_SAMPLE_COUNT_1_BIT) && (imageType == VK_IMAGE_TYPE_2D) && !format.isNonNormalizedInteger())
 	{
-		// Requires multisampling resolve, or quadlayout awareness
+		// Requires multisampling resolve
 		VkImageBlit region;
 		region.srcSubresource = pRegion.srcSubresource;
 		region.srcOffsets[0] = pRegion.srcOffset;
@@ -354,23 +353,6 @@
 	int bufferSlicePitchBytes = bufferExtent.height * bufferRowPitchBytes;
 
 	uint8_t* bufferMemory = static_cast<uint8_t*>(buffer->getOffsetPointer(region.bufferOffset));
-
-	if (copyFormat.hasQuadLayout())
-	{
-		if (bufferIsSource)
-		{
-			return device->getBlitter()->blitFromBuffer(this, region.imageSubresource, region.imageOffset,
-														region.imageExtent, bufferMemory, bufferRowPitchBytes,
-														bufferSlicePitchBytes);
-		}
-		else
-		{
-			return device->getBlitter()->blitToBuffer(this, region.imageSubresource, region.imageOffset,
-													  region.imageExtent, bufferMemory, bufferRowPitchBytes,
-													  bufferSlicePitchBytes);
-		}
-	}
-
 	uint8_t* imageMemory = static_cast<uint8_t*>(getTexelPointer(region.imageOffset, region.imageSubresource));
 	uint8_t* srcMemory = bufferIsSource ? bufferMemory : imageMemory;
 	uint8_t* dstMemory = bufferIsSource ? imageMemory : bufferMemory;