Add support for spv::BuiltInSampleMask output

The mask output from the shader is ANDed with the coverage. Slight
wrinkle here in that we have a 4 bit mask of samples in each lane, but we want to
transpose to a 4 bit mask of lane coverage per sample.

Test: dEQP-VK.renderpass.*.multisample.*
Bug: b/118386749
Change-Id: Id4d60bef71cd179915db424aa94a9d9728fb42cd
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30552
Tested-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 6189931..79e79e2 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -93,6 +93,17 @@
 			}
 		}
 
+		it = spirvShader->outputBuiltins.find(spv::BuiltInSampleMask);
+		if (it != spirvShader->outputBuiltins.end())
+		{
+			auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]);
+
+			for (auto i = 0u; i < state.multiSample; i++)
+			{
+				cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1<<i), SIMD::Int(0)));
+			}
+		}
+
 		it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth);
 		if (it != spirvShader->outputBuiltins.end())
 		{