Revert "VK_EXT_shader_stencil_export support"

This reverts commit f9881ba0b4b795229c8bd847eb84c859178b2e76.

Reason for revert: Causes failures. See:
https://ci.chromium.org/p/chromium/builders/try/linux_optional_gpu_tests_rel/36449
https://ci.chromium.org/p/chromium/builders/try/mac_optional_gpu_tests_rel/36978
https://ci.chromium.org/p/chromium/builders/try/win_optional_gpu_tests_rel/37221

Change-Id: Ib95459cefea22001fa43e07bc20a15002976a9da
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/41468
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 728fe83..8543a5e 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -750,26 +750,6 @@
 	}
 }
 
-Byte8 PixelRoutine::stencilReplaceRef(bool isBack)
-{
-	auto it = spirvShader->outputBuiltins.find(spv::BuiltInFragStencilRefEXT);
-	if(it != spirvShader->outputBuiltins.end())
-	{
-		UInt4 sRef = As<UInt4>(routine.getVariable(it->second.Id)[it->second.FirstComponent]) & UInt4(0xff);
-		// TODO (b/148295813): Could be done with a single pshufb instruction. Optimize the
-		//                     following line by either adding a rr::Shuffle() variant to do
-		//                     it explicitly or adding a Byte4(Int4) constructor would work.
-		sRef.x = rr::UInt(sRef.x) | (rr::UInt(sRef.y) << 8) | (rr::UInt(sRef.z) << 16) | (rr::UInt(sRef.w) << 24);
-
-		UInt2 sRefDuplicated;
-		sRefDuplicated = Insert(sRefDuplicated, sRef.x, 0);
-		sRefDuplicated = Insert(sRefDuplicated, sRef.x, 1);
-		return As<Byte8>(sRefDuplicated);
-	}
-
-	return *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ));
-}
-
 void PixelRoutine::stencilOperation(Byte8 &output, const Byte8 &bufferValue, VkStencilOp operation, bool isBack)
 {
 	switch(operation)
@@ -781,7 +761,7 @@
 			output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
 			break;
 		case VK_STENCIL_OP_REPLACE:
-			output = stencilReplaceRef(isBack);
+			output = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ));
 			break;
 		case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
 			output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index 72d4253..34722ad 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -65,7 +65,6 @@
 
 private:
 	Float4 interpolateCentroid(const Float4 &x, const Float4 &y, const Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
-	Byte8 stencilReplaceRef(bool isBack);
 	void stencilTest(const Pointer<Byte> &sBuffer, int q, const Int &x, Int &sMask, const Int &cMask);
 	void stencilTest(Byte8 &value, VkCompareOp stencilCompareMode, bool isBack);
 	void stencilOperation(Byte8 &newValue, const Byte8 &bufferValue, const PixelProcessor::States::StencilOpState &ops, bool isBack, const Int &zMask, const Int &sMask);
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 4e50f2d..5026373 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -381,7 +381,6 @@
 					case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
 					case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
 					case spv::CapabilityMultiView: capabilities.MultiView = true; break;
-					case spv::CapabilityStencilExportEXT: capabilities.StencilExportEXT = true; break;
 					default:
 						UNSUPPORTED("Unsupported capability %u", insn.word(1));
 				}
@@ -720,7 +719,6 @@
 				if(!strcmp(ext, "SPV_KHR_variable_pointers")) break;
 				if(!strcmp(ext, "SPV_KHR_device_group")) break;
 				if(!strcmp(ext, "SPV_KHR_multiview")) break;
-				if(!strcmp(ext, "SPV_EXT_shader_stencil_export")) break;
 				UNSUPPORTED("SPIR-V Extension: %s", ext);
 				break;
 			}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index e9876c7..406fe97 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -544,7 +544,6 @@
 		bool GroupNonUniformArithmetic : 1;
 		bool DeviceGroup : 1;
 		bool MultiView : 1;
-		bool StencilExportEXT : 1;
 	};
 
 	Capabilities const &getUsedCapabilities() const
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 810874b..5690cfa 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1915,7 +1915,7 @@
 					Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
 					Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
 					Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
-					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb
+					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 
 					switch(state.textureFormat)
 					{
@@ -2067,7 +2067,7 @@
 			Int c1 = Int(buffer[0][index[1]]);
 			Int c2 = Int(buffer[0][index[2]]);
 			Int c3 = Int(buffer[0][index[3]]);
-			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb
+			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 			UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
 
 			computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
@@ -2075,14 +2075,14 @@
 			c1 = Int(buffer[1][index[1]]);
 			c2 = Int(buffer[1][index[2]]);
 			c3 = Int(buffer[1][index[3]]);
-			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb
+			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 			UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
 
 			c0 = Int(buffer[2][index[0]]);
 			c1 = Int(buffer[2][index[1]]);
 			c2 = Int(buffer[2][index[2]]);
 			c3 = Int(buffer[2][index[3]]);
-			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb
+			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 			UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
 
 			const UShort4 yY = UShort4(iround(Yy * 0x4000));
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index 937f482..92a123a 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -321,8 +321,6 @@
 	{ VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME, VK_EXT_QUEUE_FAMILY_FOREIGN_SPEC_VERSION },
 	// The following extension is only used to add support for Bresenham lines
 	{ VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, VK_EXT_LINE_RASTERIZATION_SPEC_VERSION },
-	// The following extension is used by ANGLE to emulate blitting the stencil buffer
-	{ VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, VK_EXT_SHADER_STENCIL_EXPORT_SPEC_VERSION },
 #ifndef __ANDROID__
 	// We fully support the KHR_swapchain v70 additions, so just track the spec version.
 	{ VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_SWAPCHAIN_SPEC_VERSION },