#include <utility>
#include <algorithm>
#include <istream>
#include <ostream>
#include <limits>
#include <stdexcept>
#include <climits>
#include <cmath>
#include <cstring>
#include <cstdlib>
Go to the source code of this file.
Namespaces | |
namespace | half_float |
namespace | std |
Extensions to the C++ standard library. | |
Macros | |
#define | HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__) |
#define | HALF_ICC_VERSION 0 |
#define | HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS) |
#define | HALF_UNUSED_NOERR(name) |
#define | HALF_CONSTEXPR |
#define | HALF_CONSTEXPR_CONST const |
#define | HALF_CONSTEXPR_NOERR |
#define | HALF_NOEXCEPT |
#define | HALF_NOTHROW throw() |
#define | HALF_THREAD_LOCAL static |
#define | HALF_ENABLE_F16C_INTRINSICS __F16C__ |
#define | HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 |
#define | HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 |
#define | HALF_ROUND_STYLE 1 |
#define | HUGE_VALH std::numeric_limits<half_float::half>::infinity() |
#define | FP_FAST_FMAH 1 |
#define | HLF_ROUNDS HALF_ROUND_STYLE |
#define | FP_ILOGB0 INT_MIN |
#define | FP_ILOGBNAN INT_MAX |
#define | FP_SUBNORMAL 0 |
#define | FP_ZERO 1 |
#define | FP_NAN 2 |
#define | FP_INFINITE 3 |
#define | FP_NORMAL 4 |
#define | FE_INVALID 0x10 |
#define | FE_DIVBYZERO 0x08 |
#define | FE_OVERFLOW 0x04 |
#define | FE_UNDERFLOW 0x02 |
#define | FE_INEXACT 0x01 |
#define | FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT) |
Typedefs | |
typedef bool_type< true > | true_type |
typedef bool_type< false > | false_type |
typedef unsigned short | uint16 |
Unsigned integer of (at least) 16 bits width. More... | |
typedef unsigned long | uint32 |
Fastest unsigned integer of (at least) 32 bits width. More... | |
typedef long | int32 |
Fastest unsigned integer of (at least) 32 bits width. More... | |
Functions | |
Implementation defined classification and arithmetic | |
template<typename T > | |
bool | builtin_isinf (T arg) |
template<typename T > | |
bool | builtin_isnan (T arg) |
template<typename T > | |
bool | builtin_signbit (T arg) |
uint32 | sign_mask (uint32 arg) |
uint32 | arithmetic_shift (uint32 arg, int i) |
Error handling | |
int & | errflags () |
void | raise (int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true) |
HALF_CONSTEXPR_NOERR bool | compsignal (unsigned int x, unsigned int y) |
HALF_CONSTEXPR_NOERR unsigned int | signal (unsigned int nan) |
HALF_CONSTEXPR_NOERR unsigned int | signal (unsigned int x, unsigned int y) |
HALF_CONSTEXPR_NOERR unsigned int | signal (unsigned int x, unsigned int y, unsigned int z) |
HALF_CONSTEXPR_NOERR unsigned int | select (unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) |
HALF_CONSTEXPR_NOERR unsigned int | invalid () |
HALF_CONSTEXPR_NOERR unsigned int | pole (unsigned int sign=0) |
HALF_CONSTEXPR_NOERR unsigned int | check_underflow (unsigned int arg) |
Conversion and rounding | |
template<std::float_round_style R> | |
HALF_CONSTEXPR_NOERR unsigned int | overflow (unsigned int sign=0) |
template<std::float_round_style R> | |
HALF_CONSTEXPR_NOERR unsigned int | underflow (unsigned int sign=0) |
template<std::float_round_style R, bool I> | |
HALF_CONSTEXPR_NOERR unsigned int | rounded (unsigned int value, int g, int s) |
template<std::float_round_style R, bool E, bool I> | |
unsigned int | integral (unsigned int value) |
template<std::float_round_style R, unsigned int F, bool S, bool N, bool I> | |
unsigned int | fixed2half (uint32 m, int exp=14, unsigned int sign=0, int s=0) |
template<std::float_round_style R> | |
unsigned int | float2half_impl (float value, true_type) |
template<std::float_round_style R> | |
unsigned int | float2half_impl (double value, true_type) |
template<std::float_round_style R, typename T > | |
unsigned int | float2half_impl (T value,...) |
template<std::float_round_style R, typename T > | |
unsigned int | float2half (T value) |
template<std::float_round_style R, typename T > | |
unsigned int | int2half (T value) |
float | half2float_impl (unsigned int value, float, true_type) |
double | half2float_impl (unsigned int value, double, true_type) |
template<typename T > | |
T | half2float_impl (unsigned int value, T,...) |
template<typename T > | |
T | half2float (unsigned int value) |
template<std::float_round_style R, bool E, bool I, typename T > | |
T | half2int (unsigned int value) |
Mathematics | |
template<std::float_round_style R> | |
uint32 | mulhi (uint32 x, uint32 y) |
uint32 | multiply64 (uint32 x, uint32 y) |
uint32 | divide64 (uint32 x, uint32 y, int &s) |
template<bool Q, bool R> | |
unsigned int | mod (unsigned int x, unsigned int y, int *quo=NULL) |
template<unsigned int F> | |
uint32 | sqrt (uint32 &r, int &exp) |
uint32 | exp2 (uint32 m, unsigned int n=32) |
uint32 | log2 (uint32 m, unsigned int n=32) |
std::pair< uint32, uint32 > | sincos (uint32 mz, unsigned int n=31) |
uint32 | atan2 (uint32 my, uint32 mx, unsigned int n=31) |
uint32 | angle_arg (unsigned int abs, int &k) |
std::pair< uint32, uint32 > | atan2_args (unsigned int abs) |
std::pair< uint32, uint32 > | hyperbolic_args (unsigned int abs, int &exp, unsigned int n=32) |
template<std::float_round_style R> | |
unsigned int | exp2_post (uint32 m, int exp, bool esign, unsigned int sign=0, unsigned int n=32) |
template<std::float_round_style R, uint32 L> | |
unsigned int | log2_post (uint32 m, int ilog, int exp, unsigned int sign=0) |
template<std::float_round_style R> | |
unsigned int | hypot_post (uint32 r, int exp) |
template<std::float_round_style R> | |
unsigned int | tangent_post (uint32 my, uint32 mx, int exp, unsigned int sign=0) |
template<std::float_round_style R, bool S> | |
unsigned int | area (unsigned int arg) |
template<std::float_round_style R, bool C> | |
unsigned int | erf (unsigned int arg) |
template<std::float_round_style R, bool L> | |
unsigned int | gamma (unsigned int arg) |
Comparison operators | |
HALF_CONSTEXPR_NOERR bool | operator== (half x, half y) |
HALF_CONSTEXPR_NOERR bool | operator!= (half x, half y) |
HALF_CONSTEXPR_NOERR bool | operator< (half x, half y) |
HALF_CONSTEXPR_NOERR bool | operator> (half x, half y) |
HALF_CONSTEXPR_NOERR bool | operator<= (half x, half y) |
HALF_CONSTEXPR_NOERR bool | operator>= (half x, half y) |
Arithmetic operators | |
HALF_CONSTEXPR half | operator+ (half arg) |
HALF_CONSTEXPR half | operator- (half arg) |
half | operator+ (half x, half y) |
half | operator- (half x, half y) |
half | operator* (half x, half y) |
half | operator/ (half x, half y) |
Input and output | |
template<typename charT , typename traits > | |
std::basic_ostream< charT, traits > & | operator<< (std::basic_ostream< charT, traits > &out, half arg) |
template<typename charT , typename traits > | |
std::basic_istream< charT, traits > & | operator>> (std::basic_istream< charT, traits > &in, half &arg) |
Basic mathematical operations | |
HALF_CONSTEXPR half | fabs (half arg) |
HALF_CONSTEXPR half | abs (half arg) |
half | fmod (half x, half y) |
half | remainder (half x, half y) |
half | remquo (half x, half y, int *quo) |
half | fma (half x, half y, half z) |
HALF_CONSTEXPR_NOERR half | fmax (half x, half y) |
HALF_CONSTEXPR_NOERR half | fmin (half x, half y) |
half | fdim (half x, half y) |
half | nanh (const char *arg) |
Exponential functions | |
half | exp (half arg) |
half | exp2 (half arg) |
half | expm1 (half arg) |
half | log (half arg) |
half | log10 (half arg) |
half | log2 (half arg) |
half | log1p (half arg) |
Power functions | |
half | sqrt (half arg) |
half | rsqrt (half arg) |
half | cbrt (half arg) |
half | hypot (half x, half y) |
half | hypot (half x, half y, half z) |
half | pow (half x, half y) |
Trigonometric functions | |
void | sincos (half arg, half *sin, half *cos) |
half | sin (half arg) |
half | cos (half arg) |
half | tan (half arg) |
half | asin (half arg) |
half | acos (half arg) |
half | atan (half arg) |
half | atan2 (half y, half x) |
Hyperbolic functions | |
half | sinh (half arg) |
half | cosh (half arg) |
half | tanh (half arg) |
half | asinh (half arg) |
half | acosh (half arg) |
half | atanh (half arg) |
Error and gamma functions | |
half | erf (half arg) |
half | erfc (half arg) |
half | lgamma (half arg) |
half | tgamma (half arg) |
Rounding | |
half | ceil (half arg) |
half | floor (half arg) |
half | trunc (half arg) |
half | round (half arg) |
long | lround (half arg) |
half | rint (half arg) |
long | lrint (half arg) |
half | nearbyint (half arg) |
Floating point manipulation | |
half | frexp (half arg, int *exp) |
half | scalbln (half arg, long exp) |
half | scalbn (half arg, int exp) |
half | ldexp (half arg, int exp) |
half | modf (half arg, half *iptr) |
int | ilogb (half arg) |
half | logb (half arg) |
half | nextafter (half from, half to) |
half | nexttoward (half from, long double to) |
HALF_CONSTEXPR half | copysign (half x, half y) |
Floating point classification | |
HALF_CONSTEXPR int | fpclassify (half arg) |
HALF_CONSTEXPR bool | isfinite (half arg) |
HALF_CONSTEXPR bool | isinf (half arg) |
HALF_CONSTEXPR bool | isnan (half arg) |
HALF_CONSTEXPR bool | isnormal (half arg) |
HALF_CONSTEXPR bool | signbit (half arg) |
Comparison | |
HALF_CONSTEXPR bool | isgreater (half x, half y) |
HALF_CONSTEXPR bool | isgreaterequal (half x, half y) |
HALF_CONSTEXPR bool | isless (half x, half y) |
HALF_CONSTEXPR bool | islessequal (half x, half y) |
HALF_CONSTEXPR bool | islessgreater (half x, half y) |
HALF_CONSTEXPR bool | isunordered (half x, half y) |
Casting | |
template<typename T , typename U > | |
T | half_cast (U arg) |
template<typename T , std::float_round_style R, typename U > | |
T | half_cast (U arg) |
Error handling | |
int | feclearexcept (int excepts) |
int | fetestexcept (int excepts) |
int | feraiseexcept (int excepts) |
int | fegetexceptflag (int *flagp, int excepts) |
int | fesetexceptflag (const int *flagp, int excepts) |
void | fethrowexcept (int excepts, const char *msg="") |
Variables | |
HALF_CONSTEXPR_CONST binary_t | binary = binary_t() |
Tag for binary construction. More... | |
Main header file for half-precision functionality.
Definition in file half.hpp.
#define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT) |
#define FP_FAST_FMAH 1 |
Fast half-precision fma function. This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate half-precision multiplication followed by an addition, which is always the case.
See also: Documentation for FP_FAST_FMA
#define HALF_ENABLE_F16C_INTRINSICS __F16C__ |
Enable F16C intruction set intrinsics. Defining this to 1 enables the use of F16C compiler intrinsics for converting between half-precision and single-precision values which may result in improved performance. This will not perform additional checks for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature.
Unless predefined it will be enabled automatically when the __F16C__
symbol is defined, which some compilers do on supporting platforms.
#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS) |
#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1 |
#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1 |
Raise INEXACT exception on underflow. Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.
Note: This will actually cause underflow (and the accompanying inexact) exceptions to be raised only when the result is inexact, while if disabled bare underflow errors will be raised for any (possibly exact) subnormal result.
#define HALF_ROUND_STYLE 1 |
Default rounding mode. This specifies the rounding mode used for all conversions between halfs and more precise types (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective constants or the equivalent values of std::float_round_style:
std::float_round_style | value | rounding |
---|---|---|
std::round_indeterminate | -1 | fastest |
std::round_toward_zero | 0 | toward zero |
std::round_to_nearest | 1 | to nearest (default) |
std::round_toward_infinity | 2 | toward positive infinity |
std::round_toward_neg_infinity | 3 | toward negative infinity |
By default this is set to 1
(std::round_to_nearest
), which rounds results to the nearest representable value. It can even be set to std::numeric_limits<float>::round_style to synchronize the rounding mode with that of the built-in single-precision implementation (which is likely std::round_to_nearest
, though).
#define HLF_ROUNDS HALF_ROUND_STYLE |
Half rounding mode. In correspondence with FLT_ROUNDS
from <cfloat>
this symbol expands to the rounding mode used for half-precision operations. It is an alias for HALF_ROUND_STYLE.
See also: Documentation for FLT_ROUNDS
#define HUGE_VALH std::numeric_limits<half_float::half>::infinity() |
typedef long int32 |
typedef unsigned short uint16 |
typedef unsigned long uint32 |
|
inline |
unsigned int half_float::detail::area | ( | unsigned int | arg | ) |
Area function and postprocessing. This computes the value directly in Q2.30 using the representation asinh|acosh(x) = log(x+sqrt(x^2+|-1))
.
R | rounding mode to use |
S | true for asinh, false for acosh |
arg | half-precision argument |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if no other exception occurred |
|
inline |
|
inline |
bool half_float::detail::builtin_isinf | ( | T | arg | ) |
bool half_float::detail::builtin_isnan | ( | T | arg | ) |
bool half_float::detail::builtin_signbit | ( | T | arg | ) |
|
inline |
|
inline |
unsigned int half_float::detail::erf | ( | unsigned int | arg | ) |
Error function and postprocessing. This computes the value directly in Q1.31 using the approximations given here.
R | rounding mode to use |
C | true for comlementary error function, false else |
arg | half-precision function argument |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if no other exception occurred |
|
inline |
|
inline |
unsigned int half_float::detail::exp2_post | ( | uint32 | m, |
int | exp, | ||
bool | esign, | ||
unsigned int | sign = 0 , |
||
unsigned int | n = 32 |
||
) |
Postprocessing for binary exponential.
R | rounding mode to use |
m | fractional part of as Q0.31 |
exp | absolute value of unbiased exponent |
esign | sign of actual exponent |
sign | sign bit of result |
n | number of BKM iterations (at most 32) |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded or I is true |
unsigned int half_float::detail::fixed2half | ( | uint32 | m, |
int | exp = 14 , |
||
unsigned int | sign = 0 , |
||
int | s = 0 |
||
) |
Convert fixed point to half-precision floating-point.
R | rounding mode to use |
F | number of fractional bits in [11,31] |
S | true for signed, false for unsigned |
N | true for additional normalization step, false if already normalized to type(1) |
I | true to always raise INEXACT exception, false to raise only for rounded results |
m | mantissa in Q1.F fixed point format |
exp | biased exponent - 1 |
sign | half-precision value with sign bit only |
s | sticky bit (or of all but the most significant already discarded bits) |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded or I is true |
unsigned int half_float::detail::float2half | ( | T | value | ) |
Convert floating-point to half-precision.
R | rounding mode to use |
T | source type (builtin floating-point type) |
value | floating-point value to convert |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::float2half_impl | ( | double | value, |
true_type | |||
) |
Convert IEEE double-precision to half-precision.
R | rounding mode to use |
value | double-precision value to convert |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::float2half_impl | ( | float | value, |
true_type | |||
) |
Convert IEEE single-precision to half-precision. Credit for this goes to Jeroen van der Zijp.
R | rounding mode to use |
value | single-precision value to convert |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::float2half_impl | ( | T | value, |
... | |||
) |
Convert non-IEEE floating-point to half-precision.
R | rounding mode to use |
T | source type (builtin floating-point type) |
value | floating-point value to convert |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::gamma | ( | unsigned int | arg | ) |
Gamma function and postprocessing. This approximates the value of either the gamma function or its logarithm directly in Q1.31.
R | rounding mode to use |
L | true for lograithm of gamma function, false for gamma function |
arg | half-precision floating-point value |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if arg is not a positive integer |
T half_float::detail::half2float | ( | unsigned int | value | ) |
|
inline |
|
inline |
Convert half-precision to IEEE single-precision. Credit for this goes to Jeroen van der Zijp.
value | half-precision value to convert |
T half_float::detail::half2float_impl | ( | unsigned int | value, |
T | , | ||
... | |||
) |
T half_float::detail::half2int | ( | unsigned int | value | ) |
Convert half-precision floating-point to integer.
R | rounding mode to use |
E | true for round to even, false for round away from zero |
I | true to raise INEXACT exception (if inexact), false to never raise it |
T | type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) |
value | half-precision value to convert |
FE_INVALID | if value is not representable in type T |
FE_INEXACT | if value had to be rounded and I is true |
|
inline |
unsigned int half_float::detail::hypot_post | ( | uint32 | r, |
int | exp | ||
) |
Hypotenuse square root and postprocessing.
R | rounding mode to use |
r | mantissa as Q2.30 |
exp | biased exponent |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::int2half | ( | T | value | ) |
Convert integer to half-precision floating-point.
R | rounding mode to use |
T | type to convert (builtin integer type) |
value | integral value to convert |
FE_OVERFLOW | on overflows |
FE_INEXACT | if value had to be rounded |
unsigned int half_float::detail::integral | ( | unsigned int | value | ) |
Round half-precision number to nearest integer value.
R | rounding mode to use |
E | true for round to even, false for round away from zero |
I | true to raise INEXACT exception (if inexact), false to never raise it |
value | half-precision value to round |
FE_INVALID | for signaling NaN |
FE_INEXACT | if value had to be rounded and I is true |
|
inline |
|
inline |
unsigned int half_float::detail::log2_post | ( | uint32 | m, |
int | ilog, | ||
int | exp, | ||
unsigned int | sign = 0 |
||
) |
Postprocessing for binary logarithm.
R | rounding mode to use |
L | logarithm for base transformation as Q1.31 |
m | fractional part of logarithm as Q0.31 |
ilog | signed integer part of logarithm |
exp | biased exponent of result |
sign | sign bit of result |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if no other exception occurred |
unsigned int half_float::detail::mod | ( | unsigned int | x, |
unsigned int | y, | ||
int * | quo = NULL |
||
) |
Half precision positive modulus.
Q | true to compute full quotient, false else |
R | true to compute signed remainder, false for positive remainder |
x | first operand as positive finite half-precision value |
y | second operand as positive finite half-precision value |
quo | adress to store quotient at, nullptr if Q false |
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::overflow | ( | unsigned int | sign = 0 | ) |
|
inline |
|
inline |
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::rounded | ( | unsigned int | value, |
int | g, | ||
int | s | ||
) |
Round half-precision number.
R | rounding mode to use |
I | true to always raise INEXACT exception, false to raise only for rounded results |
value | finite half-precision number to round |
g | guard bit (most significant discarded bit) |
s | sticky bit (or of all but the most significant discarded bits) |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if value had to be rounded or I is true |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
uint32 half_float::detail::sqrt | ( | uint32 & | r, |
int & | exp | ||
) |
unsigned int half_float::detail::tangent_post | ( | uint32 | my, |
uint32 | mx, | ||
int | exp, | ||
unsigned int | sign = 0 |
||
) |
Division and postprocessing for tangents.
R | rounding mode to use |
my | dividend as Q1.31 |
mx | divisor as Q1.31 |
exp | biased exponent of result |
sign | sign bit of result |
FE_OVERFLOW | on overflows |
FE_UNDERFLOW | on underflows |
FE_INEXACT | if no other exception occurred |
HALF_CONSTEXPR_NOERR unsigned int half_float::detail::underflow | ( | unsigned int | sign = 0 | ) |