blob: ba904c1fd7217b95ca6e09a36e5958f78ab418a5 [file] [log] [blame]
// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "PixelProgram.hpp"
#include "SamplerCore.hpp"
#include "Device/Primitive.hpp"
#include "Device/Renderer.hpp"
namespace sw
{
// Union all cMask and return it as 4 booleans
Int4 PixelProgram::maskAny(Int cMask[4]) const
{
// See if at least 1 sample is used
Int maskUnion = cMask[0];
for(auto i = 1u; i < state.multiSample; i++)
{
maskUnion |= cMask[i];
}
// Convert to 4 booleans
Int4 laneBits = Int4(1, 2, 4, 8);
Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
Int4 mask(maskUnion);
mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
return mask;
}
// Union all cMask/sMask/zMask and return it as 4 booleans
Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const
{
// See if at least 1 sample is used
Int maskUnion = cMask[0] & sMask[0] & zMask[0];
for(auto i = 1u; i < state.multiSample; i++)
{
maskUnion |= (cMask[i] & sMask[i] & zMask[i]);
}
// Convert to 4 booleans
Int4 laneBits = Int4(1, 2, 4, 8);
Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
Int4 mask(maskUnion);
mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
return mask;
}
void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4])
{
routine.setImmutableInputBuiltins(spirvShader);
routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID)))));
});
routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
assert(builtin.SizeInComponents == 4);
value[builtin.FirstComponent+0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f);
value[builtin.FirstComponent+1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f);
value[builtin.FirstComponent+2] = z[0]; // sample 0
value[builtin.FirstComponent+3] = w;
});
routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
assert(builtin.SizeInComponents == 2);
value[builtin.FirstComponent+0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) +
SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX))));
value[builtin.FirstComponent+1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) +
SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY))));
});
routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
});
routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
{
assert(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask));
});
routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1);
routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1);
routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID));
}
void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
{
routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
routine.pushConstants = data + OFFSET(DrawData, pushConstants);
routine.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, constants));
auto it = spirvShader->inputBuiltins.find(spv::BuiltInFrontFacing);
if (it != spirvShader->inputBuiltins.end())
{
ASSERT(it->second.SizeInComponents == 1);
auto frontFacing = Int4(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask)));
routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(frontFacing);
}
it = spirvShader->inputBuiltins.find(spv::BuiltInSampleMask);
if (it != spirvShader->inputBuiltins.end())
{
static_assert(SIMD::Width == 4, "Expects SIMD width to be 4");
Int4 laneBits = Int4(1, 2, 4, 8);
Int4 inputSampleMask = Int4(1) & CmpNEQ(Int4(cMask[0]) & laneBits, Int4(0));
for (auto i = 1u; i < state.multiSample; i++)
{
inputSampleMask |= Int4(1 << i) & CmpNEQ(Int4(cMask[i]) & laneBits, Int4(0));
}
routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(inputSampleMask);
// Sample mask input is an array, as the spec contemplates MSAA levels higher than 32.
// Fill any non-zero indices with 0.
for (auto i = 1u; i < it->second.SizeInComponents; i++)
routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = Float4(0);
}
// Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
// handled separately, through the cMask.
auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask);
routine.killMask = 0;
spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
spirvShader->emitEpilog(&routine);
for(int i = 0; i < RENDERTARGETS; i++)
{
c[i].x = routine.outputs[i * 4];
c[i].y = routine.outputs[i * 4 + 1];
c[i].z = routine.outputs[i * 4 + 2];
c[i].w = routine.outputs[i * 4 + 3];
}
clampColor(c);
if(spirvShader->getModes().ContainsKill)
{
for (auto i = 0u; i < state.multiSample; i++)
{
cMask[i] &= ~routine.killMask;
}
}
it = spirvShader->outputBuiltins.find(spv::BuiltInSampleMask);
if (it != spirvShader->outputBuiltins.end())
{
auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]);
for (auto i = 0u; i < state.multiSample; i++)
{
cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1<<i), SIMD::Int(0)));
}
}
it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth);
if (it != spirvShader->outputBuiltins.end())
{
oDepth = Min(Max(routine.getVariable(it->second.Id)[it->second.FirstComponent], Float4(0.0f)), Float4(1.0f));
}
}
Bool PixelProgram::alphaTest(Int cMask[4])
{
if(!state.alphaToCoverage)
{
return true;
}
alphaToCoverage(cMask, c[0].w);
Int pass = cMask[0];
for(unsigned int q = 1; q < state.multiSample; q++)
{
pass = pass | cMask[q];
}
return pass != 0x0;
}
void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
{
for(int index = 0; index < RENDERTARGETS; index++)
{
if(!state.colorWriteActive(index))
{
continue;
}
auto format = state.targetFormat[index];
switch(format)
{
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_R8G8B8A8_SRGB:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
for(unsigned int q = 0; q < state.multiSample; q++)
{
Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
Vector4s color;
if(format == VK_FORMAT_A1R5G5B5_UNORM_PACK16)
{
color.x = UShort4(c[index].x * Float4(0xFBFF), false);
color.y = UShort4(c[index].y * Float4(0xFBFF), false);
color.z = UShort4(c[index].z * Float4(0xFBFF), false);
color.w = UShort4(c[index].w * Float4(0xFFFF), false);
}
else if(format == VK_FORMAT_R5G6B5_UNORM_PACK16)
{
color.x = UShort4(c[index].x * Float4(0xFBFF), false);
color.y = UShort4(c[index].y * Float4(0xFDFF), false);
color.z = UShort4(c[index].z * Float4(0xFBFF), false);
color.w = UShort4(c[index].w * Float4(0xFFFF), false);
}
else
{
color.x = convertFixed16(c[index].x, false);
color.y = convertFixed16(c[index].y, false);
color.z = convertFixed16(c[index].z, false);
color.w = convertFixed16(c[index].w, false);
}
if(state.multiSampleMask & (1 << q))
{
alphaBlend(index, buffer, color, x);
writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
}
}
break;
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R32_SINT:
case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32B32A32_SINT:
case VK_FORMAT_R32_UINT:
case VK_FORMAT_R32G32_UINT:
case VK_FORMAT_R32G32B32A32_UINT:
case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R8_SINT:
case VK_FORMAT_R8G8_SINT:
case VK_FORMAT_R8G8B8A8_SINT:
case VK_FORMAT_R8_UINT:
case VK_FORMAT_R8G8_UINT:
case VK_FORMAT_R8G8B8A8_UINT:
case VK_FORMAT_A8B8G8R8_UINT_PACK32:
case VK_FORMAT_A8B8G8R8_SINT_PACK32:
case VK_FORMAT_A2B10G10R10_UINT_PACK32:
for(unsigned int q = 0; q < state.multiSample; q++)
{
Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
Vector4f color = c[index];
if(state.multiSampleMask & (1 << q))
{
alphaBlend(index, buffer, color, x);
writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
}
}
break;
default:
UNIMPLEMENTED("VkFormat: %d", int(format));
}
}
}
void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
{
for(int index = 0; index < RENDERTARGETS; index++)
{
if(!state.colorWriteActive(index) && !(index == 0 && state.alphaToCoverage))
{
continue;
}
switch(state.targetFormat[index])
{
case VK_FORMAT_UNDEFINED:
break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_R8G8B8A8_SRGB:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
break;
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R32_SINT:
case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32B32A32_SINT:
case VK_FORMAT_R32_UINT:
case VK_FORMAT_R32G32_UINT:
case VK_FORMAT_R32G32B32A32_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R16_SINT:
case VK_FORMAT_R16G16_SINT:
case VK_FORMAT_R16G16B16A16_SINT:
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16G16_UINT:
case VK_FORMAT_R16G16B16A16_UINT:
case VK_FORMAT_R8_SINT:
case VK_FORMAT_R8G8_SINT:
case VK_FORMAT_R8G8B8A8_SINT:
case VK_FORMAT_R8_UINT:
case VK_FORMAT_R8G8_UINT:
case VK_FORMAT_R8G8B8A8_UINT:
case VK_FORMAT_A8B8G8R8_UINT_PACK32:
case VK_FORMAT_A8B8G8R8_SINT_PACK32:
case VK_FORMAT_A2B10G10R10_UINT_PACK32:
break;
default:
UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
}
}
}
Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
{
Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
}
}