Use half-precision for special internal formats. This change uses half-precision floating-point implementation formats for all OpenGL half-precision floating-point formats, as well as the R11F_G11F_B10F and RGB9_E5 special internal formats. sw::FORMAT_X16B16G16R16F was implemented for the formats without alpha. RGB9E5 conversion was optimized to not require powf(2.0, x), and 11- and 10-bit floating-point formats were optimized to map directly to 16-bit half-precision floating-point. Change-Id: Ic33f903d01f37394244aec9f53b0e67d1c978764 Reviewed-on: https://swiftshader-review.googlesource.com/15410 Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Common/Half.hpp b/src/Common/Half.hpp index f62ea27..f2d378e 100644 --- a/src/Common/Half.hpp +++ b/src/Common/Half.hpp
@@ -20,6 +20,7 @@ class half { public: + half() = default; explicit half(float f); operator float() const; @@ -30,6 +31,63 @@ private: unsigned short fp16i; }; + + inline half shortAsHalf(short s) + { + union + { + half h; + short s; + } hs; + + hs.s = s; + + return hs.h; + } + + class RGB9E5 + { + unsigned int R : 9; + unsigned int G : 9; + unsigned int B : 9; + unsigned int E : 5; + + public: + void toRGB16F(half rgb[3]) const + { + constexpr int offset = 24; // Exponent bias (15) + number of mantissa bits per component (9) = 24 + + const float factor = (1u << E) * (1.0f / (1 << offset)); + rgb[0] = half(R * factor); + rgb[1] = half(G * factor); + rgb[2] = half(B * factor); + } + }; + + class R11G11B10F + { + unsigned int R : 11; + unsigned int G : 11; + unsigned int B : 10; + + static inline half float11ToFloat16(unsigned short fp11) + { + return shortAsHalf(fp11 << 4); // Sign bit 0 + } + + static inline half float10ToFloat16(unsigned short fp10) + { + return shortAsHalf(fp10 << 5); // Sign bit 0 + } + + public: + void toRGB16F(half rgb[3]) const + { + rgb[0] = float11ToFloat16(R); + rgb[1] = float11ToFloat16(G); + rgb[2] = float10ToFloat16(B); + } + }; } #endif // sw_Half_hpp
diff --git a/src/Common/Math.hpp b/src/Common/Math.hpp index eb734e0..bae3c12 100644 --- a/src/Common/Math.hpp +++ b/src/Common/Math.hpp
@@ -16,6 +16,7 @@ #define sw_Math_hpp #include "Types.hpp" +#include "Half.hpp" #include <cmath> #if defined(_MSC_VER) @@ -360,122 +361,6 @@ { return static_cast<int>(min(x, 0x7FFFFFFFu)); } - - class RGB9E5Data - { - unsigned int R : 9; - unsigned int G : 9; - unsigned int B : 9; - unsigned int E : 5; - - public: - void toRGBFloats(float* rgb) const - { - static const float Offset = -24.0f; // Exponent Bias (15) + Number of mantissa bits per component (9) = 24 - - const float factor = powf(2.0f, static_cast<float>(E) + Offset); - rgb[0] = static_cast<float>(R) * factor; - rgb[1] = static_cast<float>(G) * factor; - rgb[2] = static_cast<float>(B) * factor; - } - }; - - class R11G11B10FData - { - unsigned int R : 11; - unsigned int G : 11; - unsigned int B : 10; - - static inline float float11ToFloat32(unsigned short fp11) - { - unsigned short exponent = (fp11 >> 6) & 0x1F; - unsigned short mantissa = fp11 & 0x3F; - - unsigned int output; - if(exponent == 0x1F) - { - // INF or NAN - output = 0x7f800000 | (mantissa << 17); - } - else - { - if(exponent != 0) - { - // normalized - } - else if(mantissa != 0) - { - // The value is denormalized - exponent = 1; - - do - { - exponent--; - mantissa <<= 1; - } while((mantissa & 0x40) == 0); - - mantissa = mantissa & 0x3F; - } - else // The value is zero - { - exponent = static_cast<unsigned short>(-112); - } - - output = ((exponent + 112) << 23) | (mantissa << 17); - } - - return *(float*)(&output); - } - - static inline float float10ToFloat32(unsigned short fp10) - { - unsigned short exponent = (fp10 >> 5) & 0x1F; - unsigned short mantissa = fp10 & 0x1F; - - unsigned int output; - if(exponent == 0x1F) - { - // INF or NAN - output = 0x7f800000 | (mantissa << 17); - } - else - { - if(exponent != 0) - { - // normalized - } - else if(mantissa != 0) - { - // The value is denormalized - exponent = 1; - - do - { - exponent--; - mantissa <<= 1; - } while((mantissa & 0x20) == 0); - - mantissa = mantissa & 0x1F; - } - else // The value is zero - { - exponent = static_cast<unsigned short>(-112); - } - - output = ((exponent + 112) << 23) | (mantissa << 18); - } - - return *(float*)(&output); - } - - public: - void toRGBFloats(float* rgb) const - { - rgb[0] = float11ToFloat32(R); - rgb[1] = float11ToFloat32(G); - rgb[2] = float10ToFloat32(B); - } - }; } #endif // sw_Math_hpp
diff --git a/src/OpenGL/common/Image.cpp b/src/OpenGL/common/Image.cpp index ff957a9..6d63b47 100644 --- a/src/OpenGL/common/Image.cpp +++ b/src/OpenGL/common/Image.cpp
@@ -272,12 +272,12 @@ template<> void LoadImageRow<R11G11B10F>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width) { - const sw::R11G11B10FData *sourceRGB = reinterpret_cast<const sw::R11G11B10FData*>(source); - float *destF = reinterpret_cast<float*>(dest + xoffset * 16); + const sw::R11G11B10F *sourceRGB = reinterpret_cast<const sw::R11G11B10F*>(source); + sw::half *destF = reinterpret_cast<sw::half*>(dest + xoffset * 8); for(int x = 0; x < width; x++, sourceRGB++, destF+=4) { - sourceRGB->toRGBFloats(destF); + sourceRGB->toRGB16F(destF); destF[3] = 1.0f; } } @@ -285,12 +285,12 @@ template<> void LoadImageRow<RGB9E5>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width) { - const sw::RGB9E5Data *sourceRGB = reinterpret_cast<const sw::RGB9E5Data*>(source); - float *destF = reinterpret_cast<float*>(dest + xoffset * 16); + const sw::RGB9E5 *sourceRGB = reinterpret_cast<const sw::RGB9E5*>(source); + sw::half *destF = reinterpret_cast<sw::half*>(dest + xoffset * 8); for(int x = 0; x < width; x++, sourceRGB++, destF += 4) { - sourceRGB->toRGBFloats(destF); + sourceRGB->toRGB16F(destF); destF[3] = 1.0f; } } @@ -665,9 +665,10 @@ return sw::FORMAT_G32R32F; case GL_RGB: case GL_RGB32F: + return sw::FORMAT_X32B32G32R32F; case GL_R11F_G11F_B10F: case GL_RGB9_E5: - return sw::FORMAT_X32B32G32R32F; + return sw::FORMAT_X16B16G16R16F; case GL_RGBA: case GL_RGBA32F: return sw::FORMAT_A32B32G32R32F; @@ -676,6 +677,7 @@ case GL_RG16F: return sw::FORMAT_G16R16F; case GL_RGB16F: + return sw::FORMAT_X16B16G16R16F; case GL_RGBA16F: return sw::FORMAT_A16B16G16R16F; case GL_DEPTH_COMPONENT: @@ -703,14 +705,14 @@ case GL_RG: case GL_RG16F: return sw::FORMAT_G16R16F; - case GL_RGB: - case GL_RGB16F: case GL_RGBA: case GL_RGBA16F: return sw::FORMAT_A16B16G16R16F; + case GL_RGB: + case GL_RGB16F: case GL_R11F_G11F_B10F: case GL_RGB9_E5: - return sw::FORMAT_X32B32G32R32F; + return sw::FORMAT_X16B16G16R16F; default: UNREACHABLE(format); } @@ -914,7 +916,7 @@ } case GL_UNSIGNED_INT_10F_11F_11F_REV: case GL_UNSIGNED_INT_5_9_9_9_REV: // 5 is the exponent field, not alpha. - return sw::FORMAT_X32B32G32R32F; + return sw::FORMAT_X16B16G16R16F; default: UNREACHABLE(type); }
diff --git a/src/OpenGL/libGLESv2/Context.cpp b/src/OpenGL/libGLESv2/Context.cpp index 76058d3..ea52c9f 100644 --- a/src/OpenGL/libGLESv2/Context.cpp +++ b/src/OpenGL/libGLESv2/Context.cpp
@@ -3314,7 +3314,7 @@ egl::Image *renderTarget = nullptr; switch(format) { - case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT: // GL_NV_read_depth renderTarget = framebuffer->getDepthBuffer(); break; default:
diff --git a/src/Renderer/Blitter.hpp b/src/Renderer/Blitter.hpp index a132a7e..c6eb726 100644 --- a/src/Renderer/Blitter.hpp +++ b/src/Renderer/Blitter.hpp
@@ -27,7 +27,7 @@ { struct Options { - Options() {} + Options() = default; Options(bool filter, bool useStencil, bool convertSRGB) : writeMask(0xF), clearOperation(false), filter(filter), useStencil(useStencil), convertSRGB(convertSRGB) {} Options(unsigned int writeMask) @@ -54,7 +54,7 @@ struct State : Options { - State() {} + State() = default; State(const Options &options) : Options(options) {} bool operator==(const State &state) const
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp index dbde7e3..aa2038e 100644 --- a/src/Renderer/Renderer.cpp +++ b/src/Renderer/Renderer.cpp
@@ -281,7 +281,7 @@ setupPrimitives = &Renderer::setupPoints; } - DrawCall *draw = 0; + DrawCall *draw = nullptr; do {
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp index 03d37f2..4a4d543 100644 --- a/src/Renderer/Surface.cpp +++ b/src/Renderer/Surface.cpp
@@ -304,6 +304,8 @@ ((half*)element)[0] = (half)r; ((half*)element)[1] = (half)g; break; + case FORMAT_X16B16G16R16F: + ((half*)element)[3] = 1.0f; case FORMAT_B16G16R16F: ((half*)element)[0] = (half)r; ((half*)element)[1] = (half)g; @@ -962,6 +964,7 @@ r = ((half*)element)[0]; g = ((half*)element)[1]; break; + case FORMAT_X16B16G16R16F: case FORMAT_B16G16R16F: r = ((half*)element)[0]; g = ((half*)element)[1]; @@ -1654,6 +1657,7 @@ case FORMAT_R16F: return 2; case FORMAT_G16R16F: return 4; case FORMAT_B16G16R16F: return 6; + case FORMAT_X16B16G16R16F: return 8; case FORMAT_A16B16G16R16F: return 8; case FORMAT_A32F: return 4; case FORMAT_R32F: return 4; @@ -2891,6 +2895,7 @@ case FORMAT_R16F: case FORMAT_G16R16F: case FORMAT_B16G16R16F: + case FORMAT_X16B16G16R16F: case FORMAT_A16B16G16R16F: case FORMAT_R32F: case FORMAT_G32R32F: @@ -3947,6 +3952,7 @@ case FORMAT_R16F: return FORMAT_R32F; case FORMAT_G16R16F: return FORMAT_G32R32F; case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; + case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F; case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; case FORMAT_A32F: return FORMAT_A32B32G32R32F; case FORMAT_R32F: return FORMAT_R32F;
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp index 2ded418..7075434 100644 --- a/src/Renderer/Surface.hpp +++ b/src/Renderer/Surface.hpp
@@ -170,6 +170,7 @@ FORMAT_R16F, FORMAT_G16R16F, FORMAT_B16G16R16F, + FORMAT_X16B16G16R16F, FORMAT_A16B16G16R16F, FORMAT_A32F, FORMAT_R32F,