Make the B10G11R11 and E5B9G9R9 formats fast clearable
Shamelessly copied the FP32 -> B10G11R11 and FP32 -> E5B9G9R9
conversion functions from Angle. This allows packing the clear
color for these formats into a single 32 bit integer, which can
then be used within the fast clear function.
Tested using:
api.image_clearing.core.clear_color_image.*.b10g11r11_ufloat_pack32
api.image_clearing.core.clear_color_image.*.e5b9g9r9_ufloat_pack32
Bug b/119620767
Change-Id: Ic268da62959582f084245e88181374213734760f
Reviewed-on: https://swiftshader-review.googlesource.com/c/23650
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Corentin Wallez <cwallez@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 7340d6d..d8ee246 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -89,6 +89,14 @@
((uint32_t)(255 * g + 0.5f) << 8) |
((uint32_t)(255 * b + 0.5f) << 0);
break;
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ if((rgbaMask & 0x7) != 0x7) return false;
+ packed = R11G11B10F(color);
+ break;
+ case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ if((rgbaMask & 0x7) != 0x7) return false;
+ packed = RGB9E5(color);
+ break;
default:
return false;
}
diff --git a/src/System/Half.hpp b/src/System/Half.hpp
index f2d378e..5c23dca 100644
--- a/src/System/Half.hpp
+++ b/src/System/Half.hpp
@@ -15,6 +15,8 @@
#ifndef sw_Half_hpp
#define sw_Half_hpp
+#include <algorithm>
+
namespace sw
{
class half
@@ -53,6 +55,48 @@
unsigned int E : 5;
public:
+ RGB9E5(float rgb[3])
+ {
+ // B is the exponent bias (15)
+ constexpr int g_sharedexp_bias = 15;
+
+ // N is the number of mantissa bits per component (9)
+ constexpr int g_sharedexp_mantissabits = 9;
+
+ // Emax is the maximum allowed biased exponent value (31)
+ constexpr int g_sharedexp_maxexponent = 31;
+
+ constexpr float g_sharedexp_max =
+ ((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) /
+ static_cast<float>(1 << g_sharedexp_mantissabits)) *
+ static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias));
+
+ const float red_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[0]));
+ const float green_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[1]));
+ const float blue_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[2]));
+
+ const float max_c = std::max<float>(std::max<float>(red_c, green_c), blue_c);
+ const float exp_p =
+ std::max<float>(-g_sharedexp_bias - 1, floor(log(max_c))) + 1 + g_sharedexp_bias;
+ const int max_s = static_cast<int>(
+ floor((max_c / (pow(2.0f, exp_p - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
+ const int exp_s =
+ static_cast<int>((max_s < pow(2.0f, g_sharedexp_mantissabits)) ? exp_p : exp_p + 1);
+
+ R = static_cast<unsigned int>(
+ floor((red_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
+ G = static_cast<unsigned int>(
+ floor((green_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
+ B = static_cast<unsigned int>(
+ floor((blue_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
+ E = exp_s;
+ }
+
+ operator unsigned int() const
+ {
+ return *reinterpret_cast<const unsigned int*>(this);
+ }
+
void toRGB16F(half rgb[3]) const
{
constexpr int offset = 24; // Exponent bias (15) + number of mantissa bits per component (9) = 24
@@ -80,7 +124,165 @@
return shortAsHalf(fp10 << 5); // Sign bit 0
}
+ inline unsigned short float32ToFloat11(float fp32)
+ {
+ const unsigned int float32MantissaMask = 0x7FFFFF;
+ const unsigned int float32ExponentMask = 0x7F800000;
+ const unsigned int float32SignMask = 0x80000000;
+ const unsigned int float32ValueMask = ~float32SignMask;
+ const unsigned int float32ExponentFirstBit = 23;
+ const unsigned int float32ExponentBias = 127;
+
+ const unsigned short float11Max = 0x7BF;
+ const unsigned short float11MantissaMask = 0x3F;
+ const unsigned short float11ExponentMask = 0x7C0;
+ const unsigned short float11BitMask = 0x7FF;
+ const unsigned int float11ExponentBias = 14;
+
+ const unsigned int float32Maxfloat11 = 0x477E0000;
+ const unsigned int float32Minfloat11 = 0x38800000;
+
+ const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
+ const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
+
+ unsigned int float32Val = float32Bits & float32ValueMask;
+
+ if((float32Val & float32ExponentMask) == float32ExponentMask)
+ {
+ // INF or NAN
+ if((float32Val & float32MantissaMask) != 0)
+ {
+ return float11ExponentMask |
+ (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
+ float11MantissaMask);
+ }
+ else if(float32Sign)
+ {
+ // -INF is clamped to 0 since float11 is positive only
+ return 0;
+ }
+ else
+ {
+ return float11ExponentMask;
+ }
+ }
+ else if(float32Sign)
+ {
+ // float11 is positive only, so clamp to zero
+ return 0;
+ }
+ else if(float32Val > float32Maxfloat11)
+ {
+ // The number is too large to be represented as a float11, set to max
+ return float11Max;
+ }
+ else
+ {
+ if(float32Val < float32Minfloat11)
+ {
+ // The number is too small to be represented as a normalized float11
+ // Convert it to a denormalized value.
+ const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
+ (float32Val >> float32ExponentFirstBit);
+ float32Val =
+ ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
+ }
+ else
+ {
+ // Rebias the exponent to represent the value as a normalized float11
+ float32Val += 0xC8000000;
+ }
+
+ return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
+ }
+ }
+
+ inline unsigned short float32ToFloat10(float fp32)
+ {
+ const unsigned int float32MantissaMask = 0x7FFFFF;
+ const unsigned int float32ExponentMask = 0x7F800000;
+ const unsigned int float32SignMask = 0x80000000;
+ const unsigned int float32ValueMask = ~float32SignMask;
+ const unsigned int float32ExponentFirstBit = 23;
+ const unsigned int float32ExponentBias = 127;
+
+ const unsigned short float10Max = 0x3DF;
+ const unsigned short float10MantissaMask = 0x1F;
+ const unsigned short float10ExponentMask = 0x3E0;
+ const unsigned short float10BitMask = 0x3FF;
+ const unsigned int float10ExponentBias = 14;
+
+ const unsigned int float32Maxfloat10 = 0x477C0000;
+ const unsigned int float32Minfloat10 = 0x38800000;
+
+ const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
+ const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
+
+ unsigned int float32Val = float32Bits & float32ValueMask;
+
+ if((float32Val & float32ExponentMask) == float32ExponentMask)
+ {
+ // INF or NAN
+ if((float32Val & float32MantissaMask) != 0)
+ {
+ return float10ExponentMask |
+ (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
+ float10MantissaMask);
+ }
+ else if(float32Sign)
+ {
+ // -INF is clamped to 0 since float11 is positive only
+ return 0;
+ }
+ else
+ {
+ return float10ExponentMask;
+ }
+ }
+ else if(float32Sign)
+ {
+ // float10 is positive only, so clamp to zero
+ return 0;
+ }
+ else if(float32Val > float32Maxfloat10)
+ {
+ // The number is too large to be represented as a float11, set to max
+ return float10Max;
+ }
+ else
+ {
+ if(float32Val < float32Minfloat10)
+ {
+ // The number is too small to be represented as a normalized float11
+ // Convert it to a denormalized value.
+ const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
+ (float32Val >> float32ExponentFirstBit);
+ float32Val =
+ ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
+ }
+ else
+ {
+ // Rebias the exponent to represent the value as a normalized float11
+ float32Val += 0xC8000000;
+ }
+
+ return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
+ }
+ }
+
public:
+ R11G11B10F(float rgb[3])
+ {
+ R = float32ToFloat11(rgb[0]);
+ G = float32ToFloat11(rgb[1]);
+ B = float32ToFloat10(rgb[2]);
+ }
+
+ operator unsigned int() const
+ {
+ return *reinterpret_cast<const unsigned int*>(this);
+ }
+
void toRGB16F(half rgb[3]) const
{
rgb[0] = float11ToFloat16(R);