Only process active fragment samples This change refactors the handling of which samples we should iterate over, by putting their indices into a container object. This enables the use of range-based for loops to have a more elegant syntax and avoid the confusion around sampleId -1 having special meaning and the begin and end iteration markers still requiring checking the sample mask on each loop. Bug: b/194521425 Change-Id: Ib6fbbb3e89c3a5501311ebd81859608df44d1bd0 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/56008 Kokoro-Result: kokoro <noreply+kokoro@google.com> Reviewed-by: Sean Risser <srisser@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Device/QuadRasterizer.cpp b/src/Device/QuadRasterizer.cpp index c56d4da..6c5397c 100644 --- a/src/Device/QuadRasterizer.cpp +++ b/src/Device/QuadRasterizer.cpp
@@ -198,10 +198,6 @@ Short4 mask = CmpGT(xxxx, xLeft[i]) & CmpGT(xRight[i], xxxx); cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F; } - else - { - cMask[q] = 0; - } } quad(cBuffer, zBuffer, sBuffer, cMask, x, y);
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp index cf815a4..809a03c 100644 --- a/src/Pipeline/PixelProgram.cpp +++ b/src/Pipeline/PixelProgram.cpp
@@ -13,22 +13,31 @@ // limitations under the License. #include "PixelProgram.hpp" -#include "Constants.hpp" +#include "Constants.hpp" #include "SamplerCore.hpp" #include "Device/Primitive.hpp" #include "Device/Renderer.hpp" namespace sw { +PixelProgram::PixelProgram( + const PixelProcessor::State &state, + const vk::PipelineLayout *pipelineLayout, + const SpirvShader *spirvShader, + const vk::DescriptorSet::Bindings &descriptorSets) + : PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets) +{ +} + // Union all cMask and return it as 4 booleans -Int4 PixelProgram::maskAny(Int cMask[4]) const +Int4 PixelProgram::maskAny(Int cMask[4], const SampleSet &samples) { // See if at least 1 sample is used - Int maskUnion = cMask[0]; - for(auto i = 1u; i < state.multiSampleCount; i++) + Int maskUnion = 0; + for(unsigned int q : samples) { - maskUnion |= cMask[i]; + maskUnion |= cMask[q]; } // Convert to 4 booleans @@ -40,13 +49,13 @@ } // Union all cMask/sMask/zMask and return it as 4 booleans -Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const +Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples) { // See if at least 1 sample is used - Int maskUnion = cMask[0] & sMask[0] & zMask[0]; - for(auto i = 1u; i < state.multiSampleCount; i++) + Int maskUnion = 0; + for(unsigned int q : samples) { - maskUnion |= (cMask[i] & sMask[i] & zMask[i]); + maskUnion |= (cMask[q] & sMask[q] & zMask[q]); } // Convert to 4 booleans @@ -57,19 +66,7 @@ return mask; } -Int4 PixelProgram::maskAny(Int cMask, Int sMask, Int zMask) const -{ - Int maskUnion = cMask & sMask & zMask; - - // Convert to 4 booleans - Int4 laneBits = Int4(1, 2, 4, 8); - Int4 laneShiftsToMSB = Int4(31, 30, 29, 28); - Int4 mask(maskUnion); - mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31); - return mask; -} - -void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], int sampleId) +void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples) { routine.setImmutableInputBuiltins(spirvShader); @@ -79,20 +76,25 @@ float y0 = 0.5f; float x1 = 1.5f; float y1 = 1.5f; - if((state.multiSampleCount > 1) && (sampleId >= 0)) + + // "When Sample Shading is enabled, the x and y components of FragCoord reflect the + // location of one of the samples corresponding to the shader invocation. Otherwise, + // the x and y components of FragCoord reflect the location of the center of the fragment." + if(state.sampleShadingEnabled && state.multiSampleCount > 1) { - x0 = Constants::VkSampleLocations4[sampleId][0]; - y0 = Constants::VkSampleLocations4[sampleId][1]; + x0 = Constants::VkSampleLocations4[samples[0]][0]; + y0 = Constants::VkSampleLocations4[samples[0]][1]; x1 = 1.0f + x0; y1 = 1.0f + y0; } + routine.fragCoord[0] = SIMD::Float(Float(x)) + SIMD::Float(x0, x1, x0, x1); routine.fragCoord[1] = SIMD::Float(Float(y)) + SIMD::Float(y0, y0, y1, y1); routine.fragCoord[2] = z[0]; // sample 0 routine.fragCoord[3] = w; routine.invocationsPerSubgroup = SIMD::Width; - routine.helperInvocation = ~maskAny(cMask); + routine.helperInvocation = ~maskAny(cMask, samples); routine.windowSpacePosition[0] = x + SIMD::Int(0, 1, 0, 1); routine.windowSpacePosition[1] = y + SIMD::Int(0, 0, 1, 1); routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID)); @@ -133,11 +135,8 @@ }); } -void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4], int sampleId) +void PixelProgram::executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples) { - unsigned int sampleLoopInit = (sampleId >= 0) ? sampleId : 0; - unsigned int sampleLoopEnd = (sampleId >= 0) ? sampleId + 1 : state.multiSampleCount; - routine.descriptorSets = data + OFFSET(DrawData, descriptorSets); routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets); routine.pushConstants = data + OFFSET(DrawData, pushConstants); @@ -158,43 +157,51 @@ Int4 laneBits = Int4(1, 2, 4, 8); Int4 inputSampleMask = 0; - for(auto i = sampleLoopInit; i < sampleLoopEnd; i++) + for(unsigned int q : samples) { - inputSampleMask |= Int4(1 << i) & CmpNEQ(Int4(cMask[i]) & laneBits, Int4(0)); + inputSampleMask |= Int4(1 << q) & CmpNEQ(Int4(cMask[q]) & laneBits, Int4(0)); } routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(inputSampleMask); // Sample mask input is an array, as the spec contemplates MSAA levels higher than 32. // Fill any non-zero indices with 0. for(auto i = 1u; i < it->second.SizeInComponents; i++) + { routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = Float4(0); + } } it = spirvShader->inputBuiltins.find(spv::BuiltInSampleId); if(it != spirvShader->inputBuiltins.end()) { + ASSERT(samples.size() == 1); + int sampleId = samples[0]; routine.getVariable(it->second.Id)[it->second.FirstComponent] = - As<SIMD::Float>(SIMD::Int((sampleId >= 0) ? sampleId : 0)); + As<SIMD::Float>(SIMD::Int(sampleId)); } it = spirvShader->inputBuiltins.find(spv::BuiltInSamplePosition); if(it != spirvShader->inputBuiltins.end()) { + ASSERT(samples.size() == 1); + int sampleId = samples[0]; routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] = - SIMD::Float(((sampleId >= 0) && (state.multiSampleCount > 1)) ? Constants::VkSampleLocations4[sampleId][0] : 0.5f); + SIMD::Float((state.multiSampleCount > 1) ? Constants::VkSampleLocations4[sampleId][0] : 0.5f); routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] = - SIMD::Float(((sampleId >= 0) && (state.multiSampleCount > 1)) ? Constants::VkSampleLocations4[sampleId][1] : 0.5f); + SIMD::Float((state.multiSampleCount > 1) ? Constants::VkSampleLocations4[sampleId][1] : 0.5f); } // Note: all lanes initially active to facilitate derivatives etc. Actual coverage is // handled separately, through the cMask. auto activeLaneMask = SIMD::Int(0xFFFFFFFF); - auto storesAndAtomicsMask = (sampleId >= 0) ? maskAny(cMask[sampleId], sMask[sampleId], zMask[sampleId]) : maskAny(cMask, sMask, zMask); + auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask, samples); routine.killMask = 0; spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets, state.multiSampleCount); spirvShader->emitEpilog(&routine); - if((sampleId < 0) || (sampleId == static_cast<int>(state.multiSampleCount - 1))) + // At the last invocation of the fragment shader, clear phi data. + // TODO(b/178662288): Automatically clear phis through SpirvRoutine lifetime reduction. + if(samples[0] == static_cast<int>(state.multiSampleCount - 1)) { spirvShader->clearPhis(&routine); } @@ -215,9 +222,9 @@ if(spirvShader->getModes().ContainsKill) { - for(auto i = sampleLoopInit; i < sampleLoopEnd; i++) + for(unsigned int q : samples) { - cMask[i] &= ~routine.killMask; + cMask[q] &= ~routine.killMask; } } @@ -226,9 +233,9 @@ { auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]); - for(auto i = sampleLoopInit; i < sampleLoopEnd; i++) + for(unsigned int q : samples) { - cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << i), SIMD::Int(0))); + cMask[q] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << q), SIMD::Int(0))); } } @@ -243,23 +250,17 @@ } } -Bool PixelProgram::alphaTest(Int cMask[4], int sampleId) +Bool PixelProgram::alphaTest(Int cMask[4], const SampleSet &samples) { if(!state.alphaToCoverage) { return true; } - alphaToCoverage(cMask, c[0].w, sampleId); + alphaToCoverage(cMask, c[0].w, samples); - if(sampleId >= 0) - { - return cMask[sampleId] != 0x0; - } - - Int pass = cMask[0]; - - for(unsigned int q = 1; q < state.multiSampleCount; q++) + Int pass = 0; + for(unsigned int q : samples) { pass = pass | cMask[q]; } @@ -267,11 +268,8 @@ return pass != 0x0; } -void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], int sampleId) +void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples) { - unsigned int sampleLoopInit = (sampleId >= 0) ? sampleId : 0; - unsigned int sampleLoopEnd = (sampleId >= 0) ? sampleId + 1 : state.multiSampleCount; - for(int index = 0; index < RENDERTARGETS; index++) { if(!state.colorWriteActive(index)) @@ -296,21 +294,18 @@ case VK_FORMAT_A8B8G8R8_SRGB_PACK32: case VK_FORMAT_A2B10G10R10_UNORM_PACK32: case VK_FORMAT_A2R10G10B10_UNORM_PACK32: - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { - if(state.multiSampleMask & (1 << q)) - { - Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); - Vector4s color; + Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); + Vector4s color; - color.x = convertFixed16(c[index].x, false); - color.y = convertFixed16(c[index].y, false); - color.z = convertFixed16(c[index].z, false); - color.w = convertFixed16(c[index].w, false); + color.x = convertFixed16(c[index].x, false); + color.y = convertFixed16(c[index].y, false); + color.z = convertFixed16(c[index].z, false); + color.w = convertFixed16(c[index].w, false); - alphaBlend(index, buffer, color, x); - writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); - } + alphaBlend(index, buffer, color, x); + writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); } break; case VK_FORMAT_R16_SFLOAT: @@ -342,16 +337,13 @@ case VK_FORMAT_A8B8G8R8_SINT_PACK32: case VK_FORMAT_A2B10G10R10_UINT_PACK32: case VK_FORMAT_A2R10G10B10_UINT_PACK32: - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { - if(state.multiSampleMask & (1 << q)) - { - Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); - Vector4f color = c[index]; + Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); + Vector4f color = c[index]; - alphaBlend(index, buffer, color, x); - writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); - } + alphaBlend(index, buffer, color, x); + writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); } break; default:
diff --git a/src/Pipeline/PixelProgram.hpp b/src/Pipeline/PixelProgram.hpp index d2d2301..306c3da 100644 --- a/src/Pipeline/PixelProgram.hpp +++ b/src/Pipeline/PixelProgram.hpp
@@ -24,20 +24,17 @@ public: PixelProgram( const PixelProcessor::State &state, - vk::PipelineLayout const *pipelineLayout, - SpirvShader const *spirvShader, - const vk::DescriptorSet::Bindings &descriptorSets) - : PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets) - { - } + const vk::PipelineLayout *pipelineLayout, + const SpirvShader *spirvShader, + const vk::DescriptorSet::Bindings &descriptorSets); virtual ~PixelProgram() {} protected: - virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], int sampleId); - virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4], int sampleId); - virtual Bool alphaTest(Int cMask[4], int sampleId); - virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], int sampleId); + virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples); + virtual void executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples); + virtual Bool alphaTest(Int cMask[4], const SampleSet &samples); + virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples); private: // Color outputs @@ -46,9 +43,8 @@ // Raster operations void clampColor(Vector4f oC[RENDERTARGETS]); - Int4 maskAny(Int cMask[4]) const; - Int4 maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const; - Int4 maskAny(Int cMask, Int sMask, Int zMask) const; + static Int4 maskAny(Int cMask[4], const SampleSet &samples); + static Int4 maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples); }; } // namespace sw
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp index 543b1c9..031a028 100644 --- a/src/Pipeline/PixelRoutine.cpp +++ b/src/Pipeline/PixelRoutine.cpp
@@ -44,6 +44,7 @@ // Clearing inputs to 0 is not demanded by the spec, // but it makes the undefined behavior deterministic. + // TODO(b/155148722): Remove to detect UB. for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++) { routine.inputs[i] = Float4(0.0f); @@ -60,9 +61,27 @@ { } +PixelRoutine::SampleSet PixelRoutine::getSampleSet(int invocation) const +{ + unsigned int sampleBegin = perSampleShading ? invocation : 0; + unsigned int sampleEnd = perSampleShading ? (invocation + 1) : state.multiSampleCount; + + SampleSet samples; + + for(unsigned int q = sampleBegin; q < sampleEnd; q++) + { + if(state.multiSampleMask & (1 << q)) + { + samples.push_back(q); + } + } + + return samples; +} + void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) { - const bool earlyDepthTest = !spirvShader || spirvShader->getModes().EarlyFragmentTests; + const bool earlyFragmentTests = !spirvShader || spirvShader->getModes().EarlyFragmentTests; Int zMask[4]; // Depth mask Int sMask[4]; // Stencil mask @@ -70,17 +89,20 @@ for(int invocation = 0; invocation < invocationCount; invocation++) { - int sampleId = perSampleShading ? invocation : -1; - unsigned int sampleLoopInit = perSampleShading ? sampleId : 0; - unsigned int sampleLoopEnd = perSampleShading ? sampleId + 1 : state.multiSampleCount; + SampleSet samples = getSampleSet(invocation); - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + if(samples.empty()) + { + continue; + } + + for(unsigned int q : samples) { zMask[q] = cMask[q]; sMask[q] = cMask[q]; } - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { stencilTest(sBuffer, q, x, sMask[q], cMask[q]); } @@ -92,7 +114,7 @@ if(interpolateZ()) { - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { Float4 x = xxxx; @@ -118,16 +140,16 @@ Bool depthPass = false; - if(earlyDepthTest) + if(earlyFragmentTests) { - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); depthBoundsTest(zBuffer, q, x, zMask[q], cMask[q]); } } - If(depthPass || Bool(!earlyDepthTest)) + If(depthPass || Bool(!earlyFragmentTests)) { Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16); @@ -139,7 +161,7 @@ { Float4 WWWW(1.0e-9f); - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]); YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]); @@ -182,8 +204,8 @@ if(perSampleShading && (state.multiSampleCount > 1)) { - xxxx += Float4(Constants::SampleLocationsX[sampleId]); - yyyy += Float4(Constants::SampleLocationsY[sampleId]); + xxxx += Float4(Constants::SampleLocationsX[samples[0]]); + yyyy += Float4(Constants::SampleLocationsY[samples[0]]); } for(int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++) @@ -215,7 +237,7 @@ } } - setBuiltins(x, y, unclampedZ, w, cMask, sampleId); + setBuiltins(x, y, unclampedZ, w, cMask, samples); for(uint32_t i = 0; i < state.numClipDistances; i++) { @@ -224,12 +246,12 @@ false, true); auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0))); - for(auto ms = sampleLoopInit; ms < sampleLoopEnd; ms++) + for(unsigned int q : samples) { // FIXME(b/148105887): Fragments discarded by clipping do not exist at // all -- they should not be counted in queries or have their Z/S effects // performed when early fragment tests are enabled. - cMask[ms] &= clipMask; + cMask[q] &= clipMask; } if(spirvShader->getUsedCapabilities().ClipDistance) @@ -264,19 +286,16 @@ } } - Bool alphaPass = true; - if(spirvShader) { - bool earlyFragTests = (spirvShader && spirvShader->getModes().EarlyFragmentTests); - applyShader(cMask, earlyFragTests ? sMask : cMask, earlyDepthTest ? zMask : cMask, sampleId); + executeShader(cMask, earlyFragmentTests ? sMask : cMask, earlyFragmentTests ? zMask : cMask, samples); } - alphaPass = alphaTest(cMask, sampleId); + Bool alphaPass = alphaTest(cMask, samples); if((spirvShader && spirvShader->getModes().ContainsKill) || state.alphaToCoverage) { - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { zMask[q] &= cMask[q]; sMask[q] &= cMask[q]; @@ -285,41 +304,35 @@ If(alphaPass) { - if(!earlyDepthTest) + if(!earlyFragmentTests) { - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); depthBoundsTest(zBuffer, q, x, zMask[q], cMask[q]); } } - If(depthPass || Bool(earlyDepthTest)) + If(depthPass || Bool(earlyFragmentTests)) { - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { - if(state.multiSampleMask & (1 << q)) - { - writeDepth(zBuffer, q, x, z[q], zMask[q]); + writeDepth(zBuffer, q, x, z[q], zMask[q]); - if(state.occlusionEnabled) - { - occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q])); - } + if(state.occlusionEnabled) + { + occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q])); } } - rasterOperation(cBuffer, x, sMask, zMask, cMask, sampleId); + rasterOperation(cBuffer, x, sMask, zMask, cMask, samples); } } } - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { - if(state.multiSampleMask & (1 << q)) - { - writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); - } + writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); } } } @@ -640,7 +653,7 @@ } } -void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha, int sampleId) +void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha, const SampleSet &samples) { static const int a2c[4] = { OFFSET(DrawData, a2c0), @@ -649,10 +662,7 @@ OFFSET(DrawData, a2c3), }; - unsigned int sampleLoopInit = (sampleId >= 0) ? sampleId : 0; - unsigned int sampleLoopEnd = (sampleId >= 0) ? sampleId + 1 : state.multiSampleCount; - - for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++) + for(unsigned int q : samples) { Int4 coverage = CmpNLT(alpha, *Pointer<Float4>(data + a2c[q])); Int aMask = SignMask(coverage);
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp index fd2f9a1..edb7eb4 100644 --- a/src/Pipeline/PixelRoutine.hpp +++ b/src/Pipeline/PixelRoutine.hpp
@@ -17,6 +17,8 @@ #include "Device/QuadRasterizer.hpp" +#include <vector> + namespace sw { class PixelShader; @@ -33,6 +35,8 @@ virtual ~PixelRoutine(); protected: + using SampleSet = std::vector<int>; + Float4 z[4]; // Multisampled z Float4 w; // Used as is Float4 rhw; // Reciprocal w @@ -45,15 +49,15 @@ // Depth output Float4 oDepth; - virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], int sampleId) = 0; - virtual void applyShader(Int cMask[4], Int sMask[4], Int zMask[4], int sampleId) = 0; - virtual Bool alphaTest(Int cMask[4], int sampleId) = 0; - virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], int sampleId) = 0; + virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples) = 0; + virtual void executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples) = 0; + virtual Bool alphaTest(Int cMask[4], const SampleSet &samples) = 0; + virtual void rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples) = 0; void quad(Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) override; void alphaTest(Int &aMask, const Short4 &alpha); - void alphaToCoverage(Int cMask[4], const Float4 &alpha, int sampleId); + void alphaToCoverage(Int cMask[4], const Float4 &alpha, const SampleSet &samples); // Raster operations void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s ¤t, const Int &x); @@ -102,6 +106,8 @@ const bool shaderContainsSampleQualifier; const bool perSampleShading; const int invocationCount; + + SampleSet getSampleSet(int invocation) const; }; } // namespace sw