| // SPDX-License-Identifier: Apache-2.0 |
| // ---------------------------------------------------------------------------- |
| // Copyright 2011-2020 Arm Limited |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| // use this file except in compliance with the License. You may obtain a copy |
| // of the License at: |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| // License for the specific language governing permissions and limitations |
| // under the License. |
| // ---------------------------------------------------------------------------- |
| |
| /* |
| * This module implements a variety of mathematical data types and library |
| * functions used by the codec. |
| */ |
| |
| #ifndef ASTC_MATHLIB_H_INCLUDED |
| #define ASTC_MATHLIB_H_INCLUDED |
| |
| #include <cmath> |
| #include <cstdint> |
| |
| #ifndef M_PI |
| #define M_PI 3.14159265358979323846 |
| #endif |
| |
| /* ============================================================================ |
| Fast math library; note that many of the higher-order functions in this set |
| use approximations which are less accurate, but faster, than <cmath> standard |
| library equivalents. |
| |
| Note: Many of these are not necessarily faster than simple C versions when |
| used on a single scalar value, but are included for testing purposes as most |
| have an option based on SSE intrinsics and therefore provide an obvious route |
| to future vectorization. |
| ============================================================================ */ |
| |
| // We support scalar versions of many maths functions which use SSE intrinsics |
| // as an "optimized" path, using just one lane from the SIMD hardware. In |
| // reality these are often slower than standard C due to setup and scheduling |
| // overheads, and the fact that we're not offsetting that cost with any actual |
| // vectorization. |
| // |
| // These variants are only included as a means to test that the accuracy of an |
| // SSE implementation would be acceptable before refactoring code paths to use |
| // an actual vectorized implementation which gets some advantage from SSE. It |
| // is therefore expected that the code will go *slower* with this macro |
| // set to 1 ... |
| #define USE_SCALAR_SSE 0 |
| |
| // These are namespaced to avoid colliding with C standard library functions. |
| namespace astc |
| { |
| |
| /** |
| * @brief Test if a float value is a nan. |
| * |
| * @param val The value test. |
| * |
| * @return Zero is not a NaN, non-zero otherwise. |
| */ |
| static inline int isnan(float val) |
| { |
| return val != val; |
| } |
| |
| /** |
| * @brief Initialize the seed structure for a random number generator. |
| * |
| * Important note: For the purposes of ASTC we want sets of random numbers to |
| * use the codec, but we want the same seed value across instances and threads |
| * to ensure that image output is stable across compressor runs and across |
| * platforms. Every PRNG created by this call will therefore return the same |
| * sequence of values ... |
| * |
| * @param state The state structure to initialize. |
| */ |
| void rand_init(uint64_t state[2]); |
| |
| /** |
| * @brief Return the next random number from the generator. |
| * |
| * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the |
| * public-domain implementation given by David Blackman & Sebastiano Vigna at |
| * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c |
| * |
| * @param state The state structure to use/update. |
| */ |
| uint64_t rand(uint64_t state[2]); |
| |
| } |
| |
| /* ============================================================================ |
| Utility vector template classes with basic operations |
| ============================================================================ */ |
| |
| template <typename T> class vtype4 |
| { |
| public: |
| T x, y, z, w; |
| vtype4() {} |
| vtype4(T p, T q, T r, T s) : x(p), y(q), z(r), w(s) {} |
| vtype4(const vtype4 & p) : x(p.x), y(p.y), z(p.z), w(p.w) {} |
| vtype4 &operator =(const vtype4 &s) { |
| this->x = s.x; |
| this->y = s.y; |
| this->z = s.z; |
| this->w = s.w; |
| return *this; |
| } |
| }; |
| |
| typedef vtype4<int> int4; |
| typedef vtype4<unsigned int> uint4; |
| |
| static inline int4 operator+(int4 p, int4 q) { return int4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); } |
| static inline uint4 operator+(uint4 p, uint4 q) { return uint4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); } |
| |
| static inline int4 operator-(int4 p, int4 q) { return int4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); } |
| static inline uint4 operator-(uint4 p, uint4 q) { return uint4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); } |
| |
| static inline int4 operator*(int4 p, int4 q) { return int4( p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); } |
| static inline uint4 operator*(uint4 p, uint4 q) { return uint4( p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); } |
| |
| static inline int4 operator*(int4 p, int q) { return int4( p.x * q, p.y * q, p.z * q, p.w * q ); } |
| static inline uint4 operator*(uint4 p, uint32_t q) { return uint4( p.x * q, p.y * q, p.z * q, p.w * q ); } |
| |
| static inline int4 operator*(int p, int4 q) { return q * p; } |
| static inline uint4 operator*(uint32_t p, uint4 q) { return q * p; } |
| |
| #ifndef MIN |
| #define MIN(x,y) ((x)<(y)?(x):(y)) |
| #endif |
| |
| #ifndef MAX |
| #define MAX(x,y) ((x)>(y)?(x):(y)) |
| #endif |
| |
| /* ============================================================================ |
| Softfloat library with fp32 and fp16 conversion functionality. |
| ============================================================================ */ |
| typedef union if32_ |
| { |
| uint32_t u; |
| int32_t s; |
| float f; |
| } if32; |
| |
| uint32_t clz32(uint32_t p); |
| |
| /* sized soft-float types. These are mapped to the sized integer |
| types of C99, instead of C's floating-point types; this is because |
| the library needs to maintain exact, bit-level control on all |
| operations on these data types. */ |
| typedef uint16_t sf16; |
| typedef uint32_t sf32; |
| |
| /* widening float->float conversions */ |
| sf32 sf16_to_sf32(sf16); |
| |
| float sf16_to_float(sf16); |
| |
| #endif |