Use half-precision for special internal formats.
This change uses half-precision floating-point implementation formats
for all OpenGL half-precision floating-point formats, as well as the
R11F_G11F_B10F and RGB9_E5 special internal formats.
sw::FORMAT_X16B16G16R16F was implemented for the formats without alpha.
RGB9E5 conversion was optimized to not require powf(2.0, x), and 11-
and 10-bit floating-point formats were optimized to map directly to
16-bit half-precision floating-point.
Change-Id: Ic33f903d01f37394244aec9f53b0e67d1c978764
Reviewed-on: https://swiftshader-review.googlesource.com/15410
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Common/Half.hpp b/src/Common/Half.hpp
index f62ea27..f2d378e 100644
--- a/src/Common/Half.hpp
+++ b/src/Common/Half.hpp
@@ -20,6 +20,7 @@
class half
{
public:
+ half() = default;
explicit half(float f);
operator float() const;
@@ -30,6 +31,63 @@
private:
unsigned short fp16i;
};
+
+ inline half shortAsHalf(short s)
+ {
+ union
+ {
+ half h;
+ short s;
+ } hs;
+
+ hs.s = s;
+
+ return hs.h;
+ }
+
+ class RGB9E5
+ {
+ unsigned int R : 9;
+ unsigned int G : 9;
+ unsigned int B : 9;
+ unsigned int E : 5;
+
+ public:
+ void toRGB16F(half rgb[3]) const
+ {
+ constexpr int offset = 24; // Exponent bias (15) + number of mantissa bits per component (9) = 24
+
+ const float factor = (1u << E) * (1.0f / (1 << offset));
+ rgb[0] = half(R * factor);
+ rgb[1] = half(G * factor);
+ rgb[2] = half(B * factor);
+ }
+ };
+
+ class R11G11B10F
+ {
+ unsigned int R : 11;
+ unsigned int G : 11;
+ unsigned int B : 10;
+
+ static inline half float11ToFloat16(unsigned short fp11)
+ {
+ return shortAsHalf(fp11 << 4); // Sign bit 0
+ }
+
+ static inline half float10ToFloat16(unsigned short fp10)
+ {
+ return shortAsHalf(fp10 << 5); // Sign bit 0
+ }
+
+ public:
+ void toRGB16F(half rgb[3]) const
+ {
+ rgb[0] = float11ToFloat16(R);
+ rgb[1] = float11ToFloat16(G);
+ rgb[2] = float10ToFloat16(B);
+ }
+ };
}
#endif // sw_Half_hpp
diff --git a/src/Common/Math.hpp b/src/Common/Math.hpp
index eb734e0..bae3c12 100644
--- a/src/Common/Math.hpp
+++ b/src/Common/Math.hpp
@@ -16,6 +16,7 @@
#define sw_Math_hpp
#include "Types.hpp"
+#include "Half.hpp"
#include <cmath>
#if defined(_MSC_VER)
@@ -360,122 +361,6 @@
{
return static_cast<int>(min(x, 0x7FFFFFFFu));
}
-
- class RGB9E5Data
- {
- unsigned int R : 9;
- unsigned int G : 9;
- unsigned int B : 9;
- unsigned int E : 5;
-
- public:
- void toRGBFloats(float* rgb) const
- {
- static const float Offset = -24.0f; // Exponent Bias (15) + Number of mantissa bits per component (9) = 24
-
- const float factor = powf(2.0f, static_cast<float>(E) + Offset);
- rgb[0] = static_cast<float>(R) * factor;
- rgb[1] = static_cast<float>(G) * factor;
- rgb[2] = static_cast<float>(B) * factor;
- }
- };
-
- class R11G11B10FData
- {
- unsigned int R : 11;
- unsigned int G : 11;
- unsigned int B : 10;
-
- static inline float float11ToFloat32(unsigned short fp11)
- {
- unsigned short exponent = (fp11 >> 6) & 0x1F;
- unsigned short mantissa = fp11 & 0x3F;
-
- unsigned int output;
- if(exponent == 0x1F)
- {
- // INF or NAN
- output = 0x7f800000 | (mantissa << 17);
- }
- else
- {
- if(exponent != 0)
- {
- // normalized
- }
- else if(mantissa != 0)
- {
- // The value is denormalized
- exponent = 1;
-
- do
- {
- exponent--;
- mantissa <<= 1;
- } while((mantissa & 0x40) == 0);
-
- mantissa = mantissa & 0x3F;
- }
- else // The value is zero
- {
- exponent = static_cast<unsigned short>(-112);
- }
-
- output = ((exponent + 112) << 23) | (mantissa << 17);
- }
-
- return *(float*)(&output);
- }
-
- static inline float float10ToFloat32(unsigned short fp10)
- {
- unsigned short exponent = (fp10 >> 5) & 0x1F;
- unsigned short mantissa = fp10 & 0x1F;
-
- unsigned int output;
- if(exponent == 0x1F)
- {
- // INF or NAN
- output = 0x7f800000 | (mantissa << 17);
- }
- else
- {
- if(exponent != 0)
- {
- // normalized
- }
- else if(mantissa != 0)
- {
- // The value is denormalized
- exponent = 1;
-
- do
- {
- exponent--;
- mantissa <<= 1;
- } while((mantissa & 0x20) == 0);
-
- mantissa = mantissa & 0x1F;
- }
- else // The value is zero
- {
- exponent = static_cast<unsigned short>(-112);
- }
-
- output = ((exponent + 112) << 23) | (mantissa << 18);
- }
-
- return *(float*)(&output);
- }
-
- public:
- void toRGBFloats(float* rgb) const
- {
- rgb[0] = float11ToFloat32(R);
- rgb[1] = float11ToFloat32(G);
- rgb[2] = float10ToFloat32(B);
- }
- };
}
#endif // sw_Math_hpp
diff --git a/src/OpenGL/common/Image.cpp b/src/OpenGL/common/Image.cpp
index ff957a9..6d63b47 100644
--- a/src/OpenGL/common/Image.cpp
+++ b/src/OpenGL/common/Image.cpp
@@ -272,12 +272,12 @@
template<>
void LoadImageRow<R11G11B10F>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width)
{
- const sw::R11G11B10FData *sourceRGB = reinterpret_cast<const sw::R11G11B10FData*>(source);
- float *destF = reinterpret_cast<float*>(dest + xoffset * 16);
+ const sw::R11G11B10F *sourceRGB = reinterpret_cast<const sw::R11G11B10F*>(source);
+ sw::half *destF = reinterpret_cast<sw::half*>(dest + xoffset * 8);
for(int x = 0; x < width; x++, sourceRGB++, destF+=4)
{
- sourceRGB->toRGBFloats(destF);
+ sourceRGB->toRGB16F(destF);
destF[3] = 1.0f;
}
}
@@ -285,12 +285,12 @@
template<>
void LoadImageRow<RGB9E5>(const unsigned char *source, unsigned char *dest, GLint xoffset, GLsizei width)
{
- const sw::RGB9E5Data *sourceRGB = reinterpret_cast<const sw::RGB9E5Data*>(source);
- float *destF = reinterpret_cast<float*>(dest + xoffset * 16);
+ const sw::RGB9E5 *sourceRGB = reinterpret_cast<const sw::RGB9E5*>(source);
+ sw::half *destF = reinterpret_cast<sw::half*>(dest + xoffset * 8);
for(int x = 0; x < width; x++, sourceRGB++, destF += 4)
{
- sourceRGB->toRGBFloats(destF);
+ sourceRGB->toRGB16F(destF);
destF[3] = 1.0f;
}
}
@@ -665,9 +665,10 @@
return sw::FORMAT_G32R32F;
case GL_RGB:
case GL_RGB32F:
+ return sw::FORMAT_X32B32G32R32F;
case GL_R11F_G11F_B10F:
case GL_RGB9_E5:
- return sw::FORMAT_X32B32G32R32F;
+ return sw::FORMAT_X16B16G16R16F;
case GL_RGBA:
case GL_RGBA32F:
return sw::FORMAT_A32B32G32R32F;
@@ -676,6 +677,7 @@
case GL_RG16F:
return sw::FORMAT_G16R16F;
case GL_RGB16F:
+ return sw::FORMAT_X16B16G16R16F;
case GL_RGBA16F:
return sw::FORMAT_A16B16G16R16F;
case GL_DEPTH_COMPONENT:
@@ -703,14 +705,14 @@
case GL_RG:
case GL_RG16F:
return sw::FORMAT_G16R16F;
- case GL_RGB:
- case GL_RGB16F:
case GL_RGBA:
case GL_RGBA16F:
return sw::FORMAT_A16B16G16R16F;
+ case GL_RGB:
+ case GL_RGB16F:
case GL_R11F_G11F_B10F:
case GL_RGB9_E5:
- return sw::FORMAT_X32B32G32R32F;
+ return sw::FORMAT_X16B16G16R16F;
default:
UNREACHABLE(format);
}
@@ -914,7 +916,7 @@
}
case GL_UNSIGNED_INT_10F_11F_11F_REV:
case GL_UNSIGNED_INT_5_9_9_9_REV: // 5 is the exponent field, not alpha.
- return sw::FORMAT_X32B32G32R32F;
+ return sw::FORMAT_X16B16G16R16F;
default:
UNREACHABLE(type);
}
diff --git a/src/OpenGL/libGLESv2/Context.cpp b/src/OpenGL/libGLESv2/Context.cpp
index 76058d3..ea52c9f 100644
--- a/src/OpenGL/libGLESv2/Context.cpp
+++ b/src/OpenGL/libGLESv2/Context.cpp
@@ -3314,7 +3314,7 @@
egl::Image *renderTarget = nullptr;
switch(format)
{
- case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT: // GL_NV_read_depth
renderTarget = framebuffer->getDepthBuffer();
break;
default:
diff --git a/src/Renderer/Blitter.hpp b/src/Renderer/Blitter.hpp
index a132a7e..c6eb726 100644
--- a/src/Renderer/Blitter.hpp
+++ b/src/Renderer/Blitter.hpp
@@ -27,7 +27,7 @@
{
struct Options
{
- Options() {}
+ Options() = default;
Options(bool filter, bool useStencil, bool convertSRGB)
: writeMask(0xF), clearOperation(false), filter(filter), useStencil(useStencil), convertSRGB(convertSRGB) {}
Options(unsigned int writeMask)
@@ -54,7 +54,7 @@
struct State : Options
{
- State() {}
+ State() = default;
State(const Options &options) : Options(options) {}
bool operator==(const State &state) const
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index dbde7e3..aa2038e 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -281,7 +281,7 @@
setupPrimitives = &Renderer::setupPoints;
}
- DrawCall *draw = 0;
+ DrawCall *draw = nullptr;
do
{
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 03d37f2..4a4d543 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -304,6 +304,8 @@
((half*)element)[0] = (half)r;
((half*)element)[1] = (half)g;
break;
+ case FORMAT_X16B16G16R16F:
+ ((half*)element)[3] = 1.0f;
case FORMAT_B16G16R16F:
((half*)element)[0] = (half)r;
((half*)element)[1] = (half)g;
@@ -962,6 +964,7 @@
r = ((half*)element)[0];
g = ((half*)element)[1];
break;
+ case FORMAT_X16B16G16R16F:
case FORMAT_B16G16R16F:
r = ((half*)element)[0];
g = ((half*)element)[1];
@@ -1654,6 +1657,7 @@
case FORMAT_R16F: return 2;
case FORMAT_G16R16F: return 4;
case FORMAT_B16G16R16F: return 6;
+ case FORMAT_X16B16G16R16F: return 8;
case FORMAT_A16B16G16R16F: return 8;
case FORMAT_A32F: return 4;
case FORMAT_R32F: return 4;
@@ -2891,6 +2895,7 @@
case FORMAT_R16F:
case FORMAT_G16R16F:
case FORMAT_B16G16R16F:
+ case FORMAT_X16B16G16R16F:
case FORMAT_A16B16G16R16F:
case FORMAT_R32F:
case FORMAT_G32R32F:
@@ -3947,6 +3952,7 @@
case FORMAT_R16F: return FORMAT_R32F;
case FORMAT_G16R16F: return FORMAT_G32R32F;
case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
+ case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F;
case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
case FORMAT_A32F: return FORMAT_A32B32G32R32F;
case FORMAT_R32F: return FORMAT_R32F;
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp
index 2ded418..7075434 100644
--- a/src/Renderer/Surface.hpp
+++ b/src/Renderer/Surface.hpp
@@ -170,6 +170,7 @@
FORMAT_R16F,
FORMAT_G16R16F,
FORMAT_B16G16R16F,
+ FORMAT_X16B16G16R16F,
FORMAT_A16B16G16R16F,
FORMAT_A32F,
FORMAT_R32F,