Fix component normalization for texture gather
Also optimized for better reuse in the existing normalization code.
Bug: b/205576016
Tests: dEQP-VK.pipeline.sampler.border_swizzle.*pack*
Change-Id: I02d965aa2be9cf349febca2f4a12a9a4456592d5
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/60048
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Commit-Queue: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index ab00880..80c467a 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -142,6 +142,9 @@
bool use32BitFiltering = hasFloatTexture() || hasUnnormalizedIntegerTexture() || force32BitFiltering ||
state.isCube() || state.unnormalizedCoordinates || state.compareEnable ||
borderModeActive() || (function == Gather) || (function == Fetch);
+ const sw::float4 compScale = getComponentScale();
+ int gatherComponent = (function == Gather) ? getGatherComponent() : 0;
+ int numComponents = (function == Gather) ? 4 : textureComponentCount();
if(use32BitFiltering)
{
@@ -149,76 +152,9 @@
if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable)
{
- switch(state.textureFormat)
+ for(int component = 0; component < numComponents; component++)
{
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- case VK_FORMAT_B5G6R5_UNORM_PACK16:
- c.x *= Float4(1.0f / 0xF800);
- c.y *= Float4(1.0f / 0xFC00);
- c.z *= Float4(1.0f / 0xF800);
- break;
- case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
- case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
- case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
- case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
- c.x *= Float4(1.0f / 0xF000);
- c.y *= Float4(1.0f / 0xF000);
- c.z *= Float4(1.0f / 0xF000);
- c.w *= Float4(1.0f / 0xF000);
- break;
- case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
- case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- c.x *= Float4(1.0f / 0xF800);
- c.y *= Float4(1.0f / 0xF800);
- c.z *= Float4(1.0f / 0xF800);
- c.w *= Float4(1.0f / 0x8000);
- break;
- case VK_FORMAT_R8_SNORM:
- case VK_FORMAT_R8G8_SNORM:
- case VK_FORMAT_R8G8B8A8_SNORM:
- case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
- c.x = Max(c.x * Float4(1.0f / 0x7F00), Float4(-1.0f));
- c.y = Max(c.y * Float4(1.0f / 0x7F00), Float4(-1.0f));
- c.z = Max(c.z * Float4(1.0f / 0x7F00), Float4(-1.0f));
- c.w = Max(c.w * Float4(1.0f / 0x7F00), Float4(-1.0f));
- break;
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_R8G8_UNORM:
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_B8G8R8A8_SRGB:
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_R8_SRGB:
- case VK_FORMAT_R8G8_SRGB:
- c.x *= Float4(1.0f / 0xFF00u);
- c.y *= Float4(1.0f / 0xFF00u);
- c.z *= Float4(1.0f / 0xFF00u);
- c.w *= Float4(1.0f / 0xFF00u);
- break;
- //TODO(b/205576016)
- case VK_FORMAT_R16_UNORM:
- case VK_FORMAT_R16G16_UNORM:
- case VK_FORMAT_R16G16B16A16_UNORM:
- c.x *= Float4(1.0f / 0xFFFF);
- c.y *= Float4(1.0f / 0xFFFF);
- c.z *= Float4(1.0f / 0xFFFF);
- c.w *= Float4(1.0f / 0xFFFF);
- break;
- case VK_FORMAT_R16_SNORM:
- case VK_FORMAT_R16G16_SNORM:
- case VK_FORMAT_R16G16B16A16_SNORM:
- c.x = Max(c.x * Float4(1.0f / 0x7FFF), Float4(-1.0f));
- c.y = Max(c.y * Float4(1.0f / 0x7FFF), Float4(-1.0f));
- c.z = Max(c.z * Float4(1.0f / 0x7FFF), Float4(-1.0f));
- c.w = Max(c.w * Float4(1.0f / 0x7FFF), Float4(-1.0f));
- break;
- default:
- for(int component = 0; component < textureComponentCount(); component++)
- {
- c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
- }
+ c[component] *= Float4(1.0f / compScale[(function == Gather) ? gatherComponent : component]);
}
}
}
@@ -226,74 +162,26 @@
{
Vector4s cs = sampleFilter(texture, u, v, w, a, offset, sample, lod, anisotropy, uDelta, vDelta, function);
- switch(state.textureFormat)
+ for(int component = 0; component < numComponents; component++)
{
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- case VK_FORMAT_B5G6R5_UNORM_PACK16:
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
- break;
- case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
- case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
- case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
- case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000);
- c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000);
- break;
- case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
- case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
- case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
- c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000);
- break;
- case VK_FORMAT_R8_SNORM:
- case VK_FORMAT_R8G8_SNORM:
- case VK_FORMAT_R8G8B8A8_SNORM:
- case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
- c.x = Max(Float4(cs.x) * Float4(1.0f / 0x7F00), Float4(-1.0f));
- c.y = Max(Float4(cs.y) * Float4(1.0f / 0x7F00), Float4(-1.0f));
- c.z = Max(Float4(cs.z) * Float4(1.0f / 0x7F00), Float4(-1.0f));
- c.w = Max(Float4(cs.w) * Float4(1.0f / 0x7F00), Float4(-1.0f));
- break;
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_R8G8_UNORM:
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_B8G8R8A8_SRGB:
- case VK_FORMAT_R8G8B8A8_SRGB:
- case VK_FORMAT_R8_SRGB:
- case VK_FORMAT_R8G8_SRGB:
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xFF00u);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFF00u);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xFF00u);
- c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xFF00u);
- break;
- case VK_FORMAT_R16_SNORM:
- case VK_FORMAT_R16G16_SNORM:
- case VK_FORMAT_R16G16B16A16_SNORM:
- c.x = Max(Float4(cs.x) * Float4(1.0f / 0x7FFF), Float4(-1.0f));
- c.y = Max(Float4(cs.y) * Float4(1.0f / 0x7FFF), Float4(-1.0f));
- c.z = Max(Float4(cs.z) * Float4(1.0f / 0x7FFF), Float4(-1.0f));
- c.w = Max(Float4(cs.w) * Float4(1.0f / 0x7FFF), Float4(-1.0f));
- break;
- default:
- for(int component = 0; component < textureComponentCount(); component++)
+ if(hasUnsignedTextureComponent(component))
{
- if(hasUnsignedTextureComponent(component))
- {
- convertUnsigned16(c[component], cs[component]);
- }
- else
- {
- convertSigned15(c[component], cs[component]);
- }
+ c[component] = Float4(As<UShort4>(cs[component]));
}
+ else
+ {
+ c[component] = Float4(cs[component]);
+ }
+
+ c[component] *= Float4(1.0f / compScale[(function == Gather) ? gatherComponent : component]);
+ }
+ }
+
+ if(state.textureFormat.isSignedNormalized())
+ {
+ for(int component = 0; component < numComponents; component++)
+ {
+ c[component] = Max(c[component], Float4(-1.0f));
}
}
@@ -2179,19 +2067,7 @@
Vector4i border;
const bool scaled = !hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable;
- const sw::float4 scale = state.textureFormat.getScale();
- const sw::int4 bits = state.textureFormat.bitsPerComponent();
- const sw::int4 shift = sw::int4(std::max(16 - bits.x, 0), std::max(16 - bits.y, 0), std::max(16 - bits.z, 0), std::max(16 - bits.w, 0));
- sw::float4 scaleComp = scaled ? sw::float4(static_cast<uint16_t>(scale.x) << shift.x, static_cast<uint16_t>(scale.y) << shift.y,
- static_cast<uint16_t>(scale.z) << shift.z, static_cast<uint16_t>(scale.w) << shift.w)
- : sw::float4(1.0, 1.0, 1.0, 1.0);
- // TODO(b/204709464): Unlike other formats, the fixed point presentation of the formats below are handled with bit extension.
- // This special handling of such formats should be removed later.
- const VkFormat format = static_cast<VkFormat>(state.textureFormat);
- if(format == VK_FORMAT_A2B10G10R10_UNORM_PACK32 || format == VK_FORMAT_A2R10G10B10_UNORM_PACK32)
- scaleComp = sw::float4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
- else if(format == VK_FORMAT_A2B10G10R10_SNORM_PACK32 || format == VK_FORMAT_A2R10G10B10_SNORM_PACK32)
- scaleComp = sw::float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF);
+ const sw::float4 scaleComp = scaled ? getComponentScale() : sw::float4(1.0f, 1.0f, 1.0f, 1.0f);
switch(state.border)
{
@@ -2608,16 +2484,6 @@
}
}
-void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs)
-{
- cf = Float4(cs) * Float4(1.0f / 0x7FFF);
-}
-
-void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs)
-{
- cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF);
-}
-
void SamplerCore::sRGBtoLinearFF00(Short4 &c)
{
c = As<UShort4>(c) >> 8;
@@ -2701,4 +2567,56 @@
}
}
+sw::float4 SamplerCore::getComponentScale() const
+{
+ // TODO(b/204709464): Unlike other formats, the fixed point presentation of the formats below are handled with bit extension.
+ // This special handling of such formats should be removed later.
+ const VkFormat format = static_cast<VkFormat>(state.textureFormat);
+ switch(format)
+ {
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+ return sw::float4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
+ case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ return sw::float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF);
+ default:
+ break;
+ };
+
+ const sw::float4 scale = state.textureFormat.getScale();
+ const sw::int4 bits = state.textureFormat.bitsPerComponent();
+ const sw::int4 shift = sw::int4(std::max(16 - bits.x, 0), std::max(16 - bits.y, 0), std::max(16 - bits.z, 0),
+ std::max(16 - bits.w, 0));
+
+ return sw::float4(static_cast<uint16_t>(scale.x) << shift.x,
+ static_cast<uint16_t>(scale.y) << shift.y,
+ static_cast<uint16_t>(scale.z) << shift.z,
+ static_cast<uint16_t>(scale.w) << shift.w);
+}
+
+int SamplerCore::getGatherComponent() const
+{
+ VkComponentSwizzle swizzle = gatherSwizzle();
+
+ switch(swizzle)
+ {
+ default: UNSUPPORTED("VkComponentSwizzle %d", (int)swizzle); return 0;
+ case VK_COMPONENT_SWIZZLE_R:
+ case VK_COMPONENT_SWIZZLE_G:
+ case VK_COMPONENT_SWIZZLE_B:
+ case VK_COMPONENT_SWIZZLE_A:
+ // Normalize all components using the gather component scale.
+ return swizzle - VK_COMPONENT_SWIZZLE_R;
+ case VK_COMPONENT_SWIZZLE_ZERO:
+ case VK_COMPONENT_SWIZZLE_ONE:
+ // These cases are handled later.
+ return 0;
+ }
+
+ return 0;
+}
+
} // namespace sw
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp
index 591ef07..27e89bb 100644
--- a/src/Pipeline/SamplerCore.hpp
+++ b/src/Pipeline/SamplerCore.hpp
@@ -94,9 +94,6 @@
void address(const Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Int4 &offset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
Int4 computeLayerIndex(const Float4 &a, Pointer<Byte> &mipmap, SamplerFunction function);
Int4 computeFilterOffset(Float &lod);
-
- void convertSigned15(Float4 &cf, Short4 &ci);
- void convertUnsigned16(Float4 &cf, Short4 &ci);
void sRGBtoLinearFF00(Short4 &c);
bool hasFloatTexture() const;
@@ -111,6 +108,8 @@
bool isRGBComponent(int component) const;
bool borderModeActive() const;
VkComponentSwizzle gatherSwizzle() const;
+ sw::float4 getComponentScale() const;
+ int getGatherComponent() const;
Pointer<Byte> &constants;
const Sampler &state;