third_party/astc-encoder/Source/astc_mathlib.h - SwiftShader - Git at Google

 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
 // Copyright 2011-2020 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
 // of the License at:
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 // License for the specific language governing permissions and limitations
 // under the License.
 // ----------------------------------------------------------------------------

 /*
  * This module implements a variety of mathematical data types and library
  * functions used by the codec.
  */

 #ifndef ASTC_MATHLIB_H_INCLUDED
 #define ASTC_MATHLIB_H_INCLUDED

 #include <cmath>
 #include <cstdint>

 #ifndef M_PI
 	#define M_PI 3.14159265358979323846
 #endif

 /* ============================================================================
   Fast math library; note that many of the higher-order functions in this set
   use approximations which are less accurate, but faster, than <cmath> standard
   library equivalents.

   Note: Many of these are not necessarily faster than simple C versions when
   used on a single scalar value, but are included for testing purposes as most
   have an option based on SSE intrinsics and therefore provide an obvious route
   to future vectorization.
 ============================================================================ */

 // We support scalar versions of many maths functions which use SSE intrinsics
 // as an "optimized" path, using just one lane from the SIMD hardware. In
 // reality these are often slower than standard C due to setup and scheduling
 // overheads, and the fact that we're not offsetting that cost with any actual
 // vectorization.
 //
 // These variants are only included as a means to test that the accuracy of an
 // SSE implementation would be acceptable before refactoring code paths to use
 // an actual vectorized implementation which gets some advantage from SSE. It
 // is therefore expected that the code will go *slower* with this macro
 // set to 1 ...
 #define USE_SCALAR_SSE 0

 // These are namespaced to avoid colliding with C standard library functions.
 namespace astc
 {

 /**
  * @brief Test if a float value is a nan.
  *
  * @param val The value test.
  *
  * @return Zero is not a NaN, non-zero otherwise.
  */
 static inline int isnan(float val)
 {
 	return val != val;
 }

 /**
  * @brief Initialize the seed structure for a random number generator.
  *
  * Important note: For the purposes of ASTC we want sets of random numbers to
  * use the codec, but we want the same seed value across instances and threads
  * to ensure that image output is stable across compressor runs and across
  * platforms. Every PRNG created by this call will therefore return the same
  * sequence of values ...
  *
  * @param state The state structure to initialize.
  */
 void rand_init(uint64_t state[2]);

 /**
  * @brief Return the next random number from the generator.
  *
  * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
  * public-domain implementation given by David Blackman & Sebastiano Vigna at
  * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
  *
  * @param state The state structure to use/update.
  */
 uint64_t rand(uint64_t state[2]);

 }

 /* ============================================================================
   Utility vector template classes with basic operations
 ============================================================================ */

 template <typename T> class vtype4
 {
 public:
 	T x, y, z, w;
 	vtype4() {}
 	vtype4(T p, T q, T r, T s) : x(p),   y(q),   z(r),   w(s)   {}
 	vtype4(const vtype4 & p)   : x(p.x), y(p.y), z(p.z), w(p.w) {}
 	vtype4 &operator =(const vtype4 &s) {
 		this->x = s.x;
 		this->y = s.y;
 		this->z = s.z;
 		this->w = s.w;
 		return *this;
 	}
 };

 typedef vtype4<int>          int4;
 typedef vtype4<unsigned int> uint4;

 static inline int4    operator+(int4 p,    int4 q)     { return int4(    p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }
 static inline uint4   operator+(uint4 p,   uint4 q)    { return uint4(   p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }

 static inline int4    operator-(int4 p,    int4 q)     { return int4(    p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }
 static inline uint4   operator-(uint4 p,   uint4 q)    { return uint4(   p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }

 static inline int4    operator*(int4 p,    int4 q)     { return int4(    p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }
 static inline uint4   operator*(uint4 p,   uint4 q)    { return uint4(   p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }

 static inline int4    operator*(int4 p,    int q)      { return int4(    p.x * q, p.y * q, p.z * q, p.w * q ); }
 static inline uint4   operator*(uint4 p,   uint32_t q) { return uint4(   p.x * q, p.y * q, p.z * q, p.w * q ); }

 static inline int4    operator*(int p,      int4 q)    { return q * p; }
 static inline uint4   operator*(uint32_t p, uint4 q)   { return q * p; }

 #ifndef MIN
 	#define MIN(x,y) ((x)<(y)?(x):(y))
 #endif

 #ifndef MAX
 	#define MAX(x,y) ((x)>(y)?(x):(y))
 #endif

 /* ============================================================================
   Softfloat library with fp32 and fp16 conversion functionality.
 ============================================================================ */
 typedef union if32_
 {
 	uint32_t u;
 	int32_t s;
 	float f;
 } if32;

 uint32_t clz32(uint32_t p);

 /*	sized soft-float types. These are mapped to the sized integer
     types of C99, instead of C's floating-point types; this is because
     the library needs to maintain exact, bit-level control on all
     operations on these data types. */
 typedef uint16_t sf16;
 typedef uint32_t sf32;

 /* widening float->float conversions */
 sf32 sf16_to_sf32(sf16);

 float sf16_to_float(sf16);

 #endif
	// SPDX-License-Identifier: Apache-2.0
	// ----------------------------------------------------------------------------
	// Copyright 2011-2020 Arm Limited
	//
	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
	// use this file except in compliance with the License. You may obtain a copy
	// of the License at:
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	// License for the specific language governing permissions and limitations
	// under the License.
	// ----------------------------------------------------------------------------

	/*
	* This module implements a variety of mathematical data types and library
	* functions used by the codec.
	*/

	#ifndef ASTC_MATHLIB_H_INCLUDED
	#define ASTC_MATHLIB_H_INCLUDED

	#include <cmath>
	#include <cstdint>

	#ifndef M_PI
	#define M_PI 3.14159265358979323846
	#endif

	/* ============================================================================
	Fast math library; note that many of the higher-order functions in this set
	use approximations which are less accurate, but faster, than <cmath> standard
	library equivalents.

	Note: Many of these are not necessarily faster than simple C versions when
	used on a single scalar value, but are included for testing purposes as most
	have an option based on SSE intrinsics and therefore provide an obvious route
	to future vectorization.
	============================================================================ */

	// We support scalar versions of many maths functions which use SSE intrinsics
	// as an "optimized" path, using just one lane from the SIMD hardware. In
	// reality these are often slower than standard C due to setup and scheduling
	// overheads, and the fact that we're not offsetting that cost with any actual
	// vectorization.
	//
	// These variants are only included as a means to test that the accuracy of an
	// SSE implementation would be acceptable before refactoring code paths to use
	// an actual vectorized implementation which gets some advantage from SSE. It
	// is therefore expected that the code will go slower with this macro
	// set to 1 ...
	#define USE_SCALAR_SSE 0

	// These are namespaced to avoid colliding with C standard library functions.
	namespace astc
	{

	/**
	* @brief Test if a float value is a nan.
	*
	* @param val The value test.
	*
	* @return Zero is not a NaN, non-zero otherwise.
	*/
	static inline int isnan(float val)
	{
	return val != val;
	}

	/**
	* @brief Initialize the seed structure for a random number generator.
	*
	* Important note: For the purposes of ASTC we want sets of random numbers to
	* use the codec, but we want the same seed value across instances and threads
	* to ensure that image output is stable across compressor runs and across
	* platforms. Every PRNG created by this call will therefore return the same
	* sequence of values ...
	*
	* @param state The state structure to initialize.
	*/
	void rand_init(uint64_t state[2]);

	/**
	* @brief Return the next random number from the generator.
	*
	* This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
	* public-domain implementation given by David Blackman & Sebastiano Vigna at
	* http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
	*
	* @param state The state structure to use/update.
	*/
	uint64_t rand(uint64_t state[2]);

	}

	/* ============================================================================
	Utility vector template classes with basic operations
	============================================================================ */

	template <typename T> class vtype4
	{
	public:
	T x, y, z, w;
	vtype4() {}
	vtype4(T p, T q, T r, T s) : x(p), y(q), z(r), w(s) {}
	vtype4(const vtype4 & p) : x(p.x), y(p.y), z(p.z), w(p.w) {}
	vtype4 &operator =(const vtype4 &s) {
	this->x = s.x;
	this->y = s.y;
	this->z = s.z;
	this->w = s.w;
	return *this;
	}
	};

	typedef vtype4<int> int4;
	typedef vtype4<unsigned int> uint4;

	static inline int4 operator+(int4 p, int4 q) { return int4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }
	static inline uint4 operator+(uint4 p, uint4 q) { return uint4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }

	static inline int4 operator-(int4 p, int4 q) { return int4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }
	static inline uint4 operator-(uint4 p, uint4 q) { return uint4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }

	static inline int4 operator(int4 p, int4 q) { return int4( p.x q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }
	static inline uint4 operator(uint4 p, uint4 q) { return uint4( p.x q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }

	static inline int4 operator(int4 p, int q) { return int4( p.x q, p.y * q, p.z * q, p.w * q ); }
	static inline uint4 operator(uint4 p, uint32_t q) { return uint4( p.x q, p.y * q, p.z * q, p.w * q ); }

	static inline int4 operator(int p, int4 q) { return q p; }
	static inline uint4 operator(uint32_t p, uint4 q) { return q p; }

	#ifndef MIN
	#define MIN(x,y) ((x)<(y)?(x):(y))
	#endif

	#ifndef MAX
	#define MAX(x,y) ((x)>(y)?(x):(y))
	#endif

	/* ============================================================================
	Softfloat library with fp32 and fp16 conversion functionality.
	============================================================================ */
	typedef union if32_
	{
	uint32_t u;
	int32_t s;
	float f;
	} if32;

	uint32_t clz32(uint32_t p);

	/* sized soft-float types. These are mapped to the sized integer
	types of C99, instead of C's floating-point types; this is because
	the library needs to maintain exact, bit-level control on all
	operations on these data types. */
	typedef uint16_t sf16;
	typedef uint32_t sf32;

	/* widening float->float conversions */
	sf32 sf16_to_sf32(sf16);

	float sf16_to_float(sf16);

	#endif