Optimize clearing of depth and stencil images
Extend Blitter::fastClear(), which is based on memset() instead of using
Reactor routines, to also handle D32_SFLOAT and S8_UINT formats.
Benchmark results:
Run on (48 X 2594 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x24)
L1 Instruction 32 KiB (x24)
L2 Unified 256 KiB (x24)
L3 Unified 30720 KiB (x2)
--------------------------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------------------------
(LLVM, before)
ClearImage/VK_FORMAT_D32_SFLOAT 3.74 ms 0.016 ms 1000
(LLVM, after)
ClearImage/VK_FORMAT_D32_SFLOAT 1.08 ms 0.044 ms 10000
(Subzero, before)
ClearImage/VK_FORMAT_D32_SFLOAT 4.51 ms 0.063 ms 1000
(Subzero, after)
ClearImage/VK_FORMAT_D32_SFLOAT 0.963 ms 0.040 ms 7467
Bug: b/159455503
Change-Id: Ib1b0ad38417c516267d6addd835076a933e22dba
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/45888
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 5794e52..4b20339 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -154,20 +154,22 @@
}
}
-bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
+bool Blitter::fastClear(void *clearValue, vk::Format clearFormat, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
{
- if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
+ if(clearFormat != VK_FORMAT_R32G32B32A32_SFLOAT &&
+ clearFormat != VK_FORMAT_D32_SFLOAT &&
+ clearFormat != VK_FORMAT_S8_UINT)
{
return false;
}
- float *color = (float *)pixel;
+ float *color = reinterpret_cast<float *>(clearValue);
float r = color[0];
float g = color[1];
float b = color[2];
float a = color[3];
- uint32_t packed;
+ uint32_t packed = 0;
VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
switch(viewFormat)
@@ -202,6 +204,14 @@
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
packed = RGB9E5(color);
break;
+ case VK_FORMAT_D32_SFLOAT:
+ ASSERT(clearFormat == VK_FORMAT_D32_SFLOAT);
+ packed = *reinterpret_cast<uint32_t *>(clearValue); // float reinterpreted as uint32
+ break;
+ case VK_FORMAT_S8_UINT:
+ ASSERT(clearFormat == VK_FORMAT_S8_UINT);
+ packed = *reinterpret_cast<uint8_t *>(clearValue);
+ break;
default:
return false;
}
@@ -249,6 +259,14 @@
switch(viewFormat.bytes())
{
+ case 4:
+ for(uint32_t i = 0; i < area.extent.height; i++)
+ {
+ ASSERT(d < dest->end());
+ sw::clear((uint32_t *)d, packed, area.extent.width);
+ d += rowPitchBytes;
+ }
+ break;
case 2:
for(uint32_t i = 0; i < area.extent.height; i++)
{
@@ -257,11 +275,11 @@
d += rowPitchBytes;
}
break;
- case 4:
+ case 1:
for(uint32_t i = 0; i < area.extent.height; i++)
{
ASSERT(d < dest->end());
- sw::clear((uint32_t *)d, packed, area.extent.width);
+ memset(d, packed, area.extent.width);
d += rowPitchBytes;
}
break;