src/System/Half.hpp - SwiftShader - Git at Google

 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef sw_Half_hpp
 #define sw_Half_hpp

 #include <algorithm>
 #include <cmath>

 namespace sw
 {
 	class half
 	{
 	public:
 		half() = default;
 		explicit half(float f);

 		operator float() const;

 		half &operator=(half h);
 		half &operator=(float f);

 	private:
 		unsigned short fp16i;
 	};

 	inline half shortAsHalf(short s)
 	{
 		union
 		{
 			half h;
 			short s;
 		} hs;

 		hs.s = s;

 		return hs.h;
 	}

 	class RGB9E5
 	{
 		unsigned int R : 9;
 		unsigned int G : 9;
 		unsigned int B : 9;
 		unsigned int E : 5;

 	public:
 		RGB9E5(float rgb[3])
 		{
 			// B is the exponent bias (15)
 			constexpr int g_sharedexp_bias = 15;

 			// N is the number of mantissa bits per component (9)
 			constexpr int g_sharedexp_mantissabits = 9;

 			// Emax is the maximum allowed biased exponent value (31)
 			constexpr int g_sharedexp_maxexponent = 31;

 			constexpr float g_sharedexp_max =
 				((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) /
 					static_cast<float>(1 << g_sharedexp_mantissabits)) *
 				static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias));

 			const float red_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[0]));
 			const float green_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[1]));
 			const float blue_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[2]));

 			const float max_c = std::max<float>(std::max<float>(red_c, green_c), blue_c);
 			const float exp_p = std::max<float>(-g_sharedexp_bias - 1, floor(log2(max_c))) + 1 + g_sharedexp_bias;
 			const int max_s = static_cast<int>(floor((max_c / exp2(exp_p - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
 			const int exp_s = static_cast<int>((max_s < exp2(g_sharedexp_mantissabits)) ? exp_p : exp_p + 1);

 			R = static_cast<unsigned int>(floor((red_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
 			G = static_cast<unsigned int>(floor((green_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
 			B = static_cast<unsigned int>(floor((blue_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
 			E = exp_s;
 		}

 		operator unsigned int() const
 		{
 			return *reinterpret_cast<const unsigned int*>(this);
 		}

 		void toRGB16F(half rgb[3]) const
 		{
 			constexpr int offset = 24;   // Exponent bias (15) + number of mantissa bits per component (9) = 24

 			const float factor = (1u << E) * (1.0f / (1 << offset));
 			rgb[0] = half(R * factor);
 			rgb[1] = half(G * factor);
 			rgb[2] = half(B * factor);
 		}
 	};

 	class R11G11B10F
 	{
 		unsigned int R : 11;
 		unsigned int G : 11;
 		unsigned int B : 10;

 		static inline half float11ToFloat16(unsigned short fp11)
 		{
 			return shortAsHalf(fp11 << 4);   // Sign bit 0
 		}

 		static inline half float10ToFloat16(unsigned short fp10)
 		{
 			return shortAsHalf(fp10 << 5);   // Sign bit 0
 		}

 		inline unsigned short float32ToFloat11(float fp32)
 		{
 			const unsigned int float32MantissaMask = 0x7FFFFF;
 			const unsigned int float32ExponentMask = 0x7F800000;
 			const unsigned int float32SignMask = 0x80000000;
 			const unsigned int float32ValueMask = ~float32SignMask;
 			const unsigned int float32ExponentFirstBit = 23;
 			const unsigned int float32ExponentBias = 127;

 			const unsigned short float11Max = 0x7BF;
 			const unsigned short float11MantissaMask = 0x3F;
 			const unsigned short float11ExponentMask = 0x7C0;
 			const unsigned short float11BitMask = 0x7FF;
 			const unsigned int float11ExponentBias = 14;

 			const unsigned int float32Maxfloat11 = 0x477E0000;
 			const unsigned int float32Minfloat11 = 0x38800000;

 			const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
 			const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;

 			unsigned int float32Val = float32Bits & float32ValueMask;

 			if((float32Val & float32ExponentMask) == float32ExponentMask)
 			{
 				// INF or NAN
 				if((float32Val & float32MantissaMask) != 0)
 				{
 					return float11ExponentMask |
 						(((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
 							float11MantissaMask);
 				}
 				else if(float32Sign)
 				{
 					// -INF is clamped to 0 since float11 is positive only
 					return 0;
 				}
 				else
 				{
 					return float11ExponentMask;
 				}
 			}
 			else if(float32Sign)
 			{
 				// float11 is positive only, so clamp to zero
 				return 0;
 			}
 			else if(float32Val > float32Maxfloat11)
 			{
 				// The number is too large to be represented as a float11, set to max
 				return float11Max;
 			}
 			else
 			{
 				if(float32Val < float32Minfloat11)
 				{
 					// The number is too small to be represented as a normalized float11
 					// Convert it to a denormalized value.
 					const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
 						(float32Val >> float32ExponentFirstBit);
 					float32Val =
 						((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
 				}
 				else
 				{
 					// Rebias the exponent to represent the value as a normalized float11
 					float32Val += 0xC8000000;
 				}

 				return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
 			}
 		}

 		inline unsigned short float32ToFloat10(float fp32)
 		{
 			const unsigned int float32MantissaMask = 0x7FFFFF;
 			const unsigned int float32ExponentMask = 0x7F800000;
 			const unsigned int float32SignMask = 0x80000000;
 			const unsigned int float32ValueMask = ~float32SignMask;
 			const unsigned int float32ExponentFirstBit = 23;
 			const unsigned int float32ExponentBias = 127;

 			const unsigned short float10Max = 0x3DF;
 			const unsigned short float10MantissaMask = 0x1F;
 			const unsigned short float10ExponentMask = 0x3E0;
 			const unsigned short float10BitMask = 0x3FF;
 			const unsigned int float10ExponentBias = 14;

 			const unsigned int float32Maxfloat10 = 0x477C0000;
 			const unsigned int float32Minfloat10 = 0x38800000;

 			const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
 			const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;

 			unsigned int float32Val = float32Bits & float32ValueMask;

 			if((float32Val & float32ExponentMask) == float32ExponentMask)
 			{
 				// INF or NAN
 				if((float32Val & float32MantissaMask) != 0)
 				{
 					return float10ExponentMask |
 						(((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
 							float10MantissaMask);
 				}
 				else if(float32Sign)
 				{
 					// -INF is clamped to 0 since float11 is positive only
 					return 0;
 				}
 				else
 				{
 					return float10ExponentMask;
 				}
 			}
 			else if(float32Sign)
 			{
 				// float10 is positive only, so clamp to zero
 				return 0;
 			}
 			else if(float32Val > float32Maxfloat10)
 			{
 				// The number is too large to be represented as a float11, set to max
 				return float10Max;
 			}
 			else
 			{
 				if(float32Val < float32Minfloat10)
 				{
 					// The number is too small to be represented as a normalized float11
 					// Convert it to a denormalized value.
 					const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
 						(float32Val >> float32ExponentFirstBit);
 					float32Val =
 						((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
 				}
 				else
 				{
 					// Rebias the exponent to represent the value as a normalized float11
 					float32Val += 0xC8000000;
 				}

 				return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
 			}
 		}

 	public:
 		R11G11B10F(float rgb[3])
 		{
 			R = float32ToFloat11(rgb[0]);
 			G = float32ToFloat11(rgb[1]);
 			B = float32ToFloat10(rgb[2]);
 		}

 		operator unsigned int() const
 		{
 			return *reinterpret_cast<const unsigned int*>(this);
 		}

 		void toRGB16F(half rgb[3]) const
 		{
 			rgb[0] = float11ToFloat16(R);
 			rgb[1] = float11ToFloat16(G);
 			rgb[2] = float10ToFloat16(B);
 		}
 	};
 }

 #endif   // sw_Half_hpp
	// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#ifndef sw_Half_hpp
	#define sw_Half_hpp

	#include <algorithm>
	#include <cmath>

	namespace sw
	{
	class half
	{
	public:
	half() = default;
	explicit half(float f);

	operator float() const;

	half &operator=(half h);
	half &operator=(float f);

	private:
	unsigned short fp16i;
	};

	inline half shortAsHalf(short s)
	{
	union
	{
	half h;
	short s;
	} hs;

	hs.s = s;

	return hs.h;
	}

	class RGB9E5
	{
	unsigned int R : 9;
	unsigned int G : 9;
	unsigned int B : 9;
	unsigned int E : 5;

	public:
	RGB9E5(float rgb[3])
	{
	// B is the exponent bias (15)
	constexpr int g_sharedexp_bias = 15;

	// N is the number of mantissa bits per component (9)
	constexpr int g_sharedexp_mantissabits = 9;

	// Emax is the maximum allowed biased exponent value (31)
	constexpr int g_sharedexp_maxexponent = 31;

	constexpr float g_sharedexp_max =
	((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) /
	static_cast<float>(1 << g_sharedexp_mantissabits)) *
	static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias));

	const float red_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[0]));
	const float green_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[1]));
	const float blue_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[2]));

	const float max_c = std::max<float>(std::max<float>(red_c, green_c), blue_c);
	const float exp_p = std::max<float>(-g_sharedexp_bias - 1, floor(log2(max_c))) + 1 + g_sharedexp_bias;
	const int max_s = static_cast<int>(floor((max_c / exp2(exp_p - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
	const int exp_s = static_cast<int>((max_s < exp2(g_sharedexp_mantissabits)) ? exp_p : exp_p + 1);

	R = static_cast<unsigned int>(floor((red_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
	G = static_cast<unsigned int>(floor((green_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
	B = static_cast<unsigned int>(floor((blue_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
	E = exp_s;
	}

	operator unsigned int() const
	{
	return reinterpret_cast<const unsigned int>(this);
	}

	void toRGB16F(half rgb[3]) const
	{
	constexpr int offset = 24; // Exponent bias (15) + number of mantissa bits per component (9) = 24

	const float factor = (1u << E) * (1.0f / (1 << offset));
	rgb[0] = half(R * factor);
	rgb[1] = half(G * factor);
	rgb[2] = half(B * factor);
	}
	};

	class R11G11B10F
	{
	unsigned int R : 11;
	unsigned int G : 11;
	unsigned int B : 10;

	static inline half float11ToFloat16(unsigned short fp11)
	{
	return shortAsHalf(fp11 << 4); // Sign bit 0
	}

	static inline half float10ToFloat16(unsigned short fp10)
	{
	return shortAsHalf(fp10 << 5); // Sign bit 0
	}

	inline unsigned short float32ToFloat11(float fp32)
	{
	const unsigned int float32MantissaMask = 0x7FFFFF;
	const unsigned int float32ExponentMask = 0x7F800000;
	const unsigned int float32SignMask = 0x80000000;
	const unsigned int float32ValueMask = ~float32SignMask;
	const unsigned int float32ExponentFirstBit = 23;
	const unsigned int float32ExponentBias = 127;

	const unsigned short float11Max = 0x7BF;
	const unsigned short float11MantissaMask = 0x3F;
	const unsigned short float11ExponentMask = 0x7C0;
	const unsigned short float11BitMask = 0x7FF;
	const unsigned int float11ExponentBias = 14;

	const unsigned int float32Maxfloat11 = 0x477E0000;
	const unsigned int float32Minfloat11 = 0x38800000;

	const unsigned int float32Bits = reinterpret_cast<unsigned int>(&fp32);
	const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;

	unsigned int float32Val = float32Bits & float32ValueMask;

	if((float32Val & float32ExponentMask) == float32ExponentMask)
	{
	// INF or NAN
	if((float32Val & float32MantissaMask) != 0)
	{
	return float11ExponentMask \|
	(((float32Val >> 17) \| (float32Val >> 11) \| (float32Val >> 6) \| (float32Val)) &
	float11MantissaMask);
	}
	else if(float32Sign)
	{
	// -INF is clamped to 0 since float11 is positive only
	return 0;
	}
	else
	{
	return float11ExponentMask;
	}
	}
	else if(float32Sign)
	{
	// float11 is positive only, so clamp to zero
	return 0;
	}
	else if(float32Val > float32Maxfloat11)
	{
	// The number is too large to be represented as a float11, set to max
	return float11Max;
	}
	else
	{
	if(float32Val < float32Minfloat11)
	{
	// The number is too small to be represented as a normalized float11
	// Convert it to a denormalized value.
	const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
	(float32Val >> float32ExponentFirstBit);
	float32Val =
	((1 << float32ExponentFirstBit) \| (float32Val & float32MantissaMask)) >> shift;
	}
	else
	{
	// Rebias the exponent to represent the value as a normalized float11
	float32Val += 0xC8000000;
	}

	return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
	}
	}

	inline unsigned short float32ToFloat10(float fp32)
	{
	const unsigned int float32MantissaMask = 0x7FFFFF;
	const unsigned int float32ExponentMask = 0x7F800000;
	const unsigned int float32SignMask = 0x80000000;
	const unsigned int float32ValueMask = ~float32SignMask;
	const unsigned int float32ExponentFirstBit = 23;
	const unsigned int float32ExponentBias = 127;

	const unsigned short float10Max = 0x3DF;
	const unsigned short float10MantissaMask = 0x1F;
	const unsigned short float10ExponentMask = 0x3E0;
	const unsigned short float10BitMask = 0x3FF;
	const unsigned int float10ExponentBias = 14;

	const unsigned int float32Maxfloat10 = 0x477C0000;
	const unsigned int float32Minfloat10 = 0x38800000;

	const unsigned int float32Bits = reinterpret_cast<unsigned int>(&fp32);
	const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;

	unsigned int float32Val = float32Bits & float32ValueMask;

	if((float32Val & float32ExponentMask) == float32ExponentMask)
	{
	// INF or NAN
	if((float32Val & float32MantissaMask) != 0)
	{
	return float10ExponentMask \|
	(((float32Val >> 18) \| (float32Val >> 13) \| (float32Val >> 3) \| (float32Val)) &
	float10MantissaMask);
	}
	else if(float32Sign)
	{
	// -INF is clamped to 0 since float11 is positive only
	return 0;
	}
	else
	{
	return float10ExponentMask;
	}
	}
	else if(float32Sign)
	{
	// float10 is positive only, so clamp to zero
	return 0;
	}
	else if(float32Val > float32Maxfloat10)
	{
	// The number is too large to be represented as a float11, set to max
	return float10Max;
	}
	else
	{
	if(float32Val < float32Minfloat10)
	{
	// The number is too small to be represented as a normalized float11
	// Convert it to a denormalized value.
	const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
	(float32Val >> float32ExponentFirstBit);
	float32Val =
	((1 << float32ExponentFirstBit) \| (float32Val & float32MantissaMask)) >> shift;
	}
	else
	{
	// Rebias the exponent to represent the value as a normalized float11
	float32Val += 0xC8000000;
	}

	return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
	}
	}

	public:
	R11G11B10F(float rgb[3])
	{
	R = float32ToFloat11(rgb[0]);
	G = float32ToFloat11(rgb[1]);
	B = float32ToFloat10(rgb[2]);
	}

	operator unsigned int() const
	{
	return reinterpret_cast<const unsigned int>(this);
	}

	void toRGB16F(half rgb[3]) const
	{
	rgb[0] = float11ToFloat16(R);
	rgb[1] = float11ToFloat16(G);
	rgb[2] = float10ToFloat16(B);
	}
	};
	}

	#endif // sw_Half_hpp