Fix component normalization for texture gather Also optimized for better reuse in the existing normalization code. Bug: b/205576016 Tests: dEQP-VK.pipeline.sampler.border_swizzle.*pack* Change-Id: I02d965aa2be9cf349febca2f4a12a9a4456592d5 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/60048 Kokoro-Result: kokoro <noreply+kokoro@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Commit-Queue: Nicolas Capens <nicolascapens@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp index ab00880..80c467a 100644 --- a/src/Pipeline/SamplerCore.cpp +++ b/src/Pipeline/SamplerCore.cpp
@@ -142,6 +142,9 @@ bool use32BitFiltering = hasFloatTexture() || hasUnnormalizedIntegerTexture() || force32BitFiltering || state.isCube() || state.unnormalizedCoordinates || state.compareEnable || borderModeActive() || (function == Gather) || (function == Fetch); + const sw::float4 compScale = getComponentScale(); + int gatherComponent = (function == Gather) ? getGatherComponent() : 0; + int numComponents = (function == Gather) ? 4 : textureComponentCount(); if(use32BitFiltering) { @@ -149,76 +152,9 @@ if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable) { - switch(state.textureFormat) + for(int component = 0; component < numComponents; component++) { - case VK_FORMAT_R5G6B5_UNORM_PACK16: - case VK_FORMAT_B5G6R5_UNORM_PACK16: - c.x *= Float4(1.0f / 0xF800); - c.y *= Float4(1.0f / 0xFC00); - c.z *= Float4(1.0f / 0xF800); - break; - case VK_FORMAT_R4G4B4A4_UNORM_PACK16: - case VK_FORMAT_B4G4R4A4_UNORM_PACK16: - case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT: - case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT: - c.x *= Float4(1.0f / 0xF000); - c.y *= Float4(1.0f / 0xF000); - c.z *= Float4(1.0f / 0xF000); - c.w *= Float4(1.0f / 0xF000); - break; - case VK_FORMAT_R5G5B5A1_UNORM_PACK16: - case VK_FORMAT_B5G5R5A1_UNORM_PACK16: - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - c.x *= Float4(1.0f / 0xF800); - c.y *= Float4(1.0f / 0xF800); - c.z *= Float4(1.0f / 0xF800); - c.w *= Float4(1.0f / 0x8000); - break; - case VK_FORMAT_R8_SNORM: - case VK_FORMAT_R8G8_SNORM: - case VK_FORMAT_R8G8B8A8_SNORM: - case VK_FORMAT_A8B8G8R8_SNORM_PACK32: - c.x = Max(c.x * Float4(1.0f / 0x7F00), Float4(-1.0f)); - c.y = Max(c.y * Float4(1.0f / 0x7F00), Float4(-1.0f)); - c.z = Max(c.z * Float4(1.0f / 0x7F00), Float4(-1.0f)); - c.w = Max(c.w * Float4(1.0f / 0x7F00), Float4(-1.0f)); - break; - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_B8G8R8A8_SRGB: - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_R8_SRGB: - case VK_FORMAT_R8G8_SRGB: - c.x *= Float4(1.0f / 0xFF00u); - c.y *= Float4(1.0f / 0xFF00u); - c.z *= Float4(1.0f / 0xFF00u); - c.w *= Float4(1.0f / 0xFF00u); - break; - //TODO(b/205576016) - case VK_FORMAT_R16_UNORM: - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R16G16B16A16_UNORM: - c.x *= Float4(1.0f / 0xFFFF); - c.y *= Float4(1.0f / 0xFFFF); - c.z *= Float4(1.0f / 0xFFFF); - c.w *= Float4(1.0f / 0xFFFF); - break; - case VK_FORMAT_R16_SNORM: - case VK_FORMAT_R16G16_SNORM: - case VK_FORMAT_R16G16B16A16_SNORM: - c.x = Max(c.x * Float4(1.0f / 0x7FFF), Float4(-1.0f)); - c.y = Max(c.y * Float4(1.0f / 0x7FFF), Float4(-1.0f)); - c.z = Max(c.z * Float4(1.0f / 0x7FFF), Float4(-1.0f)); - c.w = Max(c.w * Float4(1.0f / 0x7FFF), Float4(-1.0f)); - break; - default: - for(int component = 0; component < textureComponentCount(); component++) - { - c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF); - } + c[component] *= Float4(1.0f / compScale[(function == Gather) ? gatherComponent : component]); } } } @@ -226,74 +162,26 @@ { Vector4s cs = sampleFilter(texture, u, v, w, a, offset, sample, lod, anisotropy, uDelta, vDelta, function); - switch(state.textureFormat) + for(int component = 0; component < numComponents; component++) { - case VK_FORMAT_R5G6B5_UNORM_PACK16: - case VK_FORMAT_B5G6R5_UNORM_PACK16: - c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); - c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00); - c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); - break; - case VK_FORMAT_R4G4B4A4_UNORM_PACK16: - case VK_FORMAT_B4G4R4A4_UNORM_PACK16: - case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT: - case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT: - c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000); - c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000); - c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000); - c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000); - break; - case VK_FORMAT_R5G5B5A1_UNORM_PACK16: - case VK_FORMAT_B5G5R5A1_UNORM_PACK16: - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); - c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800); - c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); - c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0x8000); - break; - case VK_FORMAT_R8_SNORM: - case VK_FORMAT_R8G8_SNORM: - case VK_FORMAT_R8G8B8A8_SNORM: - case VK_FORMAT_A8B8G8R8_SNORM_PACK32: - c.x = Max(Float4(cs.x) * Float4(1.0f / 0x7F00), Float4(-1.0f)); - c.y = Max(Float4(cs.y) * Float4(1.0f / 0x7F00), Float4(-1.0f)); - c.z = Max(Float4(cs.z) * Float4(1.0f / 0x7F00), Float4(-1.0f)); - c.w = Max(Float4(cs.w) * Float4(1.0f / 0x7F00), Float4(-1.0f)); - break; - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_A8B8G8R8_UNORM_PACK32: - case VK_FORMAT_B8G8R8A8_SRGB: - case VK_FORMAT_R8G8B8A8_SRGB: - case VK_FORMAT_R8_SRGB: - case VK_FORMAT_R8G8_SRGB: - c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xFF00u); - c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFF00u); - c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xFF00u); - c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xFF00u); - break; - case VK_FORMAT_R16_SNORM: - case VK_FORMAT_R16G16_SNORM: - case VK_FORMAT_R16G16B16A16_SNORM: - c.x = Max(Float4(cs.x) * Float4(1.0f / 0x7FFF), Float4(-1.0f)); - c.y = Max(Float4(cs.y) * Float4(1.0f / 0x7FFF), Float4(-1.0f)); - c.z = Max(Float4(cs.z) * Float4(1.0f / 0x7FFF), Float4(-1.0f)); - c.w = Max(Float4(cs.w) * Float4(1.0f / 0x7FFF), Float4(-1.0f)); - break; - default: - for(int component = 0; component < textureComponentCount(); component++) + if(hasUnsignedTextureComponent(component)) { - if(hasUnsignedTextureComponent(component)) - { - convertUnsigned16(c[component], cs[component]); - } - else - { - convertSigned15(c[component], cs[component]); - } + c[component] = Float4(As<UShort4>(cs[component])); } + else + { + c[component] = Float4(cs[component]); + } + + c[component] *= Float4(1.0f / compScale[(function == Gather) ? gatherComponent : component]); + } + } + + if(state.textureFormat.isSignedNormalized()) + { + for(int component = 0; component < numComponents; component++) + { + c[component] = Max(c[component], Float4(-1.0f)); } } @@ -2179,19 +2067,7 @@ Vector4i border; const bool scaled = !hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable; - const sw::float4 scale = state.textureFormat.getScale(); - const sw::int4 bits = state.textureFormat.bitsPerComponent(); - const sw::int4 shift = sw::int4(std::max(16 - bits.x, 0), std::max(16 - bits.y, 0), std::max(16 - bits.z, 0), std::max(16 - bits.w, 0)); - sw::float4 scaleComp = scaled ? sw::float4(static_cast<uint16_t>(scale.x) << shift.x, static_cast<uint16_t>(scale.y) << shift.y, - static_cast<uint16_t>(scale.z) << shift.z, static_cast<uint16_t>(scale.w) << shift.w) - : sw::float4(1.0, 1.0, 1.0, 1.0); - // TODO(b/204709464): Unlike other formats, the fixed point presentation of the formats below are handled with bit extension. - // This special handling of such formats should be removed later. - const VkFormat format = static_cast<VkFormat>(state.textureFormat); - if(format == VK_FORMAT_A2B10G10R10_UNORM_PACK32 || format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) - scaleComp = sw::float4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); - else if(format == VK_FORMAT_A2B10G10R10_SNORM_PACK32 || format == VK_FORMAT_A2R10G10B10_SNORM_PACK32) - scaleComp = sw::float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF); + const sw::float4 scaleComp = scaled ? getComponentScale() : sw::float4(1.0f, 1.0f, 1.0f, 1.0f); switch(state.border) { @@ -2608,16 +2484,6 @@ } } -void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs) -{ - cf = Float4(cs) * Float4(1.0f / 0x7FFF); -} - -void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs) -{ - cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF); -} - void SamplerCore::sRGBtoLinearFF00(Short4 &c) { c = As<UShort4>(c) >> 8; @@ -2701,4 +2567,56 @@ } } +sw::float4 SamplerCore::getComponentScale() const +{ + // TODO(b/204709464): Unlike other formats, the fixed point presentation of the formats below are handled with bit extension. + // This special handling of such formats should be removed later. + const VkFormat format = static_cast<VkFormat>(state.textureFormat); + switch(format) + { + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + case VK_FORMAT_A2R10G10B10_UNORM_PACK32: + return sw::float4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + case VK_FORMAT_A2B10G10R10_SNORM_PACK32: + case VK_FORMAT_A2R10G10B10_SNORM_PACK32: + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + return sw::float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF); + default: + break; + }; + + const sw::float4 scale = state.textureFormat.getScale(); + const sw::int4 bits = state.textureFormat.bitsPerComponent(); + const sw::int4 shift = sw::int4(std::max(16 - bits.x, 0), std::max(16 - bits.y, 0), std::max(16 - bits.z, 0), + std::max(16 - bits.w, 0)); + + return sw::float4(static_cast<uint16_t>(scale.x) << shift.x, + static_cast<uint16_t>(scale.y) << shift.y, + static_cast<uint16_t>(scale.z) << shift.z, + static_cast<uint16_t>(scale.w) << shift.w); +} + +int SamplerCore::getGatherComponent() const +{ + VkComponentSwizzle swizzle = gatherSwizzle(); + + switch(swizzle) + { + default: UNSUPPORTED("VkComponentSwizzle %d", (int)swizzle); return 0; + case VK_COMPONENT_SWIZZLE_R: + case VK_COMPONENT_SWIZZLE_G: + case VK_COMPONENT_SWIZZLE_B: + case VK_COMPONENT_SWIZZLE_A: + // Normalize all components using the gather component scale. + return swizzle - VK_COMPONENT_SWIZZLE_R; + case VK_COMPONENT_SWIZZLE_ZERO: + case VK_COMPONENT_SWIZZLE_ONE: + // These cases are handled later. + return 0; + } + + return 0; +} + } // namespace sw
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp index 591ef07..27e89bb 100644 --- a/src/Pipeline/SamplerCore.hpp +++ b/src/Pipeline/SamplerCore.hpp
@@ -94,9 +94,6 @@ void address(const Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Int4 &offset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function); Int4 computeLayerIndex(const Float4 &a, Pointer<Byte> &mipmap, SamplerFunction function); Int4 computeFilterOffset(Float &lod); - - void convertSigned15(Float4 &cf, Short4 &ci); - void convertUnsigned16(Float4 &cf, Short4 &ci); void sRGBtoLinearFF00(Short4 &c); bool hasFloatTexture() const; @@ -111,6 +108,8 @@ bool isRGBComponent(int component) const; bool borderModeActive() const; VkComponentSwizzle gatherSwizzle() const; + sw::float4 getComponentScale() const; + int getGatherComponent() const; Pointer<Byte> &constants; const Sampler &state;