half.hpp File Reference
#include <utility>
#include <algorithm>
#include <istream>
#include <ostream>
#include <limits>
#include <stdexcept>
#include <climits>
#include <cmath>
#include <cstring>
#include <cstdlib>

Go to the source code of this file.


struct  conditional< bool, T, typename >
 Conditional type. More...
struct  conditional< false, T, F >
struct  bool_type< bool >
 Helper for tag dispatching. More...
struct  is_float< typename >
 Type traits for floating-point types. More...
struct  is_float< const T >
struct  is_float< volatile T >
struct  is_float< const volatile T >
struct  is_float< float >
struct  is_float< double >
struct  is_float< long double >
struct  bits< T >
 Type traits for floating-point bits. More...
struct  bits< const T >
struct  bits< volatile T >
struct  bits< const volatile T >
struct  bits< float >
 Unsigned integer of (at least) 32 bits width. More...
struct  bits< double >
 Unsigned integer of (at least) 64 bits width. More...
struct  binary_t
 Tag type for binary construction. More...
struct  f31
 Class for 1.31 unsigned floating-point computation. More...
class  half
struct  half_caster< T, U, R >
struct  half_caster< half, U, R >
struct  half_caster< T, half, R >
struct  half_caster< half, half, R >
class  numeric_limits< half_float::half >


namespace  half_float
namespace  std
 Extensions to the C++ standard library.


#define HALF_GCC_VERSION   (__GNUC__*100+__GNUC_MINOR__)
#define HALF_ICC_VERSION   0
#define HALF_UNUSED_NOERR(name)
#define HALF_CONSTEXPR_CONST   const
#define HALF_NOTHROW   throw()
#define HALF_THREAD_LOCAL   static
#define HALF_ROUND_STYLE   1
#define HUGE_VALH   std::numeric_limits<half_float::half>::infinity()
#define FP_FAST_FMAH   1
#define FP_ILOGB0   INT_MIN
#define FP_SUBNORMAL   0
#define FP_ZERO   1
#define FP_NAN   2
#define FP_INFINITE   3
#define FP_NORMAL   4
#define FE_INVALID   0x10
#define FE_DIVBYZERO   0x08
#define FE_OVERFLOW   0x04
#define FE_UNDERFLOW   0x02
#define FE_INEXACT   0x01


typedef bool_type< true > true_type
typedef bool_type< false > false_type
typedef unsigned short uint16
 Unsigned integer of (at least) 16 bits width. More...
typedef unsigned long uint32
 Fastest unsigned integer of (at least) 32 bits width. More...
typedef long int32
 Fastest unsigned integer of (at least) 32 bits width. More...


Implementation defined classification and arithmetic
template<typename T >
bool builtin_isinf (T arg)
template<typename T >
bool builtin_isnan (T arg)
template<typename T >
bool builtin_signbit (T arg)
uint32 sign_mask (uint32 arg)
uint32 arithmetic_shift (uint32 arg, int i)
Error handling
int & errflags ()
void raise (int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true)
HALF_CONSTEXPR_NOERR bool compsignal (unsigned int x, unsigned int y)
HALF_CONSTEXPR_NOERR unsigned int signal (unsigned int nan)
HALF_CONSTEXPR_NOERR unsigned int signal (unsigned int x, unsigned int y)
HALF_CONSTEXPR_NOERR unsigned int signal (unsigned int x, unsigned int y, unsigned int z)
HALF_CONSTEXPR_NOERR unsigned int select (unsigned int x, unsigned int HALF_UNUSED_NOERR(y))
HALF_CONSTEXPR_NOERR unsigned int invalid ()
HALF_CONSTEXPR_NOERR unsigned int pole (unsigned int sign=0)
HALF_CONSTEXPR_NOERR unsigned int check_underflow (unsigned int arg)
Conversion and rounding
template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int overflow (unsigned int sign=0)
template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int underflow (unsigned int sign=0)
template<std::float_round_style R, bool I>
HALF_CONSTEXPR_NOERR unsigned int rounded (unsigned int value, int g, int s)
template<std::float_round_style R, bool E, bool I>
unsigned int integral (unsigned int value)
template<std::float_round_style R, unsigned int F, bool S, bool N, bool I>
unsigned int fixed2half (uint32 m, int exp=14, unsigned int sign=0, int s=0)
template<std::float_round_style R>
unsigned int float2half_impl (float value, true_type)
template<std::float_round_style R>
unsigned int float2half_impl (double value, true_type)
template<std::float_round_style R, typename T >
unsigned int float2half_impl (T value,...)
template<std::float_round_style R, typename T >
unsigned int float2half (T value)
template<std::float_round_style R, typename T >
unsigned int int2half (T value)
float half2float_impl (unsigned int value, float, true_type)
double half2float_impl (unsigned int value, double, true_type)
template<typename T >
half2float_impl (unsigned int value, T,...)
template<typename T >
half2float (unsigned int value)
template<std::float_round_style R, bool E, bool I, typename T >
half2int (unsigned int value)
template<std::float_round_style R>
uint32 mulhi (uint32 x, uint32 y)
uint32 multiply64 (uint32 x, uint32 y)
uint32 divide64 (uint32 x, uint32 y, int &s)
template<bool Q, bool R>
unsigned int mod (unsigned int x, unsigned int y, int *quo=NULL)
template<unsigned int F>
uint32 sqrt (uint32 &r, int &exp)
uint32 exp2 (uint32 m, unsigned int n=32)
uint32 log2 (uint32 m, unsigned int n=32)
std::pair< uint32, uint32 > sincos (uint32 mz, unsigned int n=31)
uint32 atan2 (uint32 my, uint32 mx, unsigned int n=31)
uint32 angle_arg (unsigned int abs, int &k)
std::pair< uint32, uint32 > atan2_args (unsigned int abs)
std::pair< uint32, uint32 > hyperbolic_args (unsigned int abs, int &exp, unsigned int n=32)
template<std::float_round_style R>
unsigned int exp2_post (uint32 m, int exp, bool esign, unsigned int sign=0, unsigned int n=32)
template<std::float_round_style R, uint32 L>
unsigned int log2_post (uint32 m, int ilog, int exp, unsigned int sign=0)
template<std::float_round_style R>
unsigned int hypot_post (uint32 r, int exp)
template<std::float_round_style R>
unsigned int tangent_post (uint32 my, uint32 mx, int exp, unsigned int sign=0)
template<std::float_round_style R, bool S>
unsigned int area (unsigned int arg)
template<std::float_round_style R, bool C>
unsigned int erf (unsigned int arg)
template<std::float_round_style R, bool L>
unsigned int gamma (unsigned int arg)
Comparison operators

HALF_CONSTEXPR_NOERR bool operator== (half x, half y)
HALF_CONSTEXPR_NOERR bool operator!= (half x, half y)
HALF_CONSTEXPR_NOERR bool operator< (half x, half y)
HALF_CONSTEXPR_NOERR bool operator> (half x, half y)
HALF_CONSTEXPR_NOERR bool operator<= (half x, half y)
HALF_CONSTEXPR_NOERR bool operator>= (half x, half y)
Arithmetic operators

HALF_CONSTEXPR half operator+ (half arg)
HALF_CONSTEXPR half operator- (half arg)
half operator+ (half x, half y)
half operator- (half x, half y)
half operator* (half x, half y)
half operator/ (half x, half y)
Input and output

template<typename charT , typename traits >
std::basic_ostream< charT, traits > & operator<< (std::basic_ostream< charT, traits > &out, half arg)
template<typename charT , typename traits >
std::basic_istream< charT, traits > & operator>> (std::basic_istream< charT, traits > &in, half &arg)
Basic mathematical operations

HALF_CONSTEXPR half fabs (half arg)
HALF_CONSTEXPR half abs (half arg)
half fmod (half x, half y)
half remainder (half x, half y)
half remquo (half x, half y, int *quo)
half fma (half x, half y, half z)
HALF_CONSTEXPR_NOERR half fmax (half x, half y)
HALF_CONSTEXPR_NOERR half fmin (half x, half y)
half fdim (half x, half y)
half nanh (const char *arg)
Exponential functions

half exp (half arg)
half exp2 (half arg)
half expm1 (half arg)
half log (half arg)
half log10 (half arg)
half log2 (half arg)
half log1p (half arg)
Power functions

half sqrt (half arg)
half rsqrt (half arg)
half cbrt (half arg)
half hypot (half x, half y)
half hypot (half x, half y, half z)
half pow (half x, half y)
Trigonometric functions

void sincos (half arg, half *sin, half *cos)
half sin (half arg)
half cos (half arg)
half tan (half arg)
half asin (half arg)
half acos (half arg)
half atan (half arg)
half atan2 (half y, half x)
Hyperbolic functions

half sinh (half arg)
half cosh (half arg)
half tanh (half arg)
half asinh (half arg)
half acosh (half arg)
half atanh (half arg)
Error and gamma functions

half erf (half arg)
half erfc (half arg)
half lgamma (half arg)
half tgamma (half arg)

half ceil (half arg)
half floor (half arg)
half trunc (half arg)
half round (half arg)
long lround (half arg)
half rint (half arg)
long lrint (half arg)
half nearbyint (half arg)
Floating point manipulation

half frexp (half arg, int *exp)
half scalbln (half arg, long exp)
half scalbn (half arg, int exp)
half ldexp (half arg, int exp)
half modf (half arg, half *iptr)
int ilogb (half arg)
half logb (half arg)
half nextafter (half from, half to)
half nexttoward (half from, long double to)
HALF_CONSTEXPR half copysign (half x, half y)
Floating point classification

HALF_CONSTEXPR int fpclassify (half arg)
HALF_CONSTEXPR bool isfinite (half arg)
HALF_CONSTEXPR bool isinf (half arg)
HALF_CONSTEXPR bool isnan (half arg)
HALF_CONSTEXPR bool isnormal (half arg)
HALF_CONSTEXPR bool signbit (half arg)

HALF_CONSTEXPR bool isgreater (half x, half y)
HALF_CONSTEXPR bool isgreaterequal (half x, half y)
HALF_CONSTEXPR bool isless (half x, half y)
HALF_CONSTEXPR bool islessequal (half x, half y)
HALF_CONSTEXPR bool islessgreater (half x, half y)
HALF_CONSTEXPR bool isunordered (half x, half y)

template<typename T , typename U >
half_cast (U arg)
template<typename T , std::float_round_style R, typename U >
half_cast (U arg)
Error handling

int feclearexcept (int excepts)
int fetestexcept (int excepts)
int feraiseexcept (int excepts)
int fegetexceptflag (int *flagp, int excepts)
int fesetexceptflag (const int *flagp, int excepts)
void fethrowexcept (int excepts, const char *msg="")


HALF_CONSTEXPR_CONST binary_t binary = binary_t()
 Tag for binary construction. More...

Detailed Description

Main header file for half-precision functionality.

Definition in file half.hpp.

Macro Definition Documentation



Definition at line 426 of file half.hpp.


#define FE_DIVBYZERO   0x08

Definition at line 422 of file half.hpp.


#define FE_INEXACT   0x01

Definition at line 425 of file half.hpp.


#define FE_INVALID   0x10

Definition at line 421 of file half.hpp.


#define FE_OVERFLOW   0x04

Definition at line 423 of file half.hpp.


#define FE_UNDERFLOW   0x02

Definition at line 424 of file half.hpp.


#define FP_FAST_FMAH   1

Fast half-precision fma function. This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate half-precision multiplication followed by an addition, which is always the case.

See also: Documentation for FP_FAST_FMA

Definition at line 389 of file half.hpp.


#define FP_ILOGB0   INT_MIN

Definition at line 399 of file half.hpp.



Definition at line 402 of file half.hpp.


#define FP_INFINITE   3

Definition at line 414 of file half.hpp.


#define FP_NAN   2

Definition at line 411 of file half.hpp.


#define FP_NORMAL   4

Definition at line 417 of file half.hpp.


#define FP_SUBNORMAL   0

Definition at line 405 of file half.hpp.


#define FP_ZERO   1

Definition at line 408 of file half.hpp.



Definition at line 223 of file half.hpp.


#define HALF_CONSTEXPR_CONST   const

Definition at line 224 of file half.hpp.



Definition at line 225 of file half.hpp.



Enable F16C intruction set intrinsics. Defining this to 1 enables the use of F16C compiler intrinsics for converting between half-precision and single-precision values which may result in improved performance. This will not perform additional checks for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature.

Unless predefined it will be enabled automatically when the __F16C__ symbol is defined, which some compilers do on supporting platforms.

Definition at line 278 of file half.hpp.



Definition at line 205 of file half.hpp.



Raise INEXACT exception on overflow. Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.

Definition at line 342 of file half.hpp.



Raise INEXACT exception on underflow. Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.

Note: This will actually cause underflow (and the accompanying inexact) exceptions to be raised only when the result is inexact, while if disabled bare underflow errors will be raised for any (possibly exact) subnormal result.

Definition at line 352 of file half.hpp.


#define HALF_GCC_VERSION   (__GNUC__*100+__GNUC_MINOR__)

Definition at line 25 of file half.hpp.


#define HALF_ICC_VERSION   0

Definition at line 34 of file half.hpp.



Definition at line 233 of file half.hpp.


#define HALF_NOTHROW   throw()

Definition at line 234 of file half.hpp.


#define HALF_ROUND_STYLE   1

Default rounding mode. This specifies the rounding mode used for all conversions between halfs and more precise types (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective constants or the equivalent values of std::float_round_style:

std::float_round_style value rounding
std::round_indeterminate -1 fastest
std::round_toward_zero 0 toward zero
std::round_to_nearest 1 to nearest (default)
std::round_toward_infinity 2 toward positive infinity
std::round_toward_neg_infinity 3 toward negative infinity

By default this is set to 1 (std::round_to_nearest), which rounds results to the nearest representable value. It can even be set to std::numeric_limits<float>::round_style to synchronize the rounding mode with that of the built-in single-precision implementation (which is likely std::round_to_nearest, though).

Definition at line 374 of file half.hpp.


#define HALF_THREAD_LOCAL   static

Definition at line 241 of file half.hpp.


#define HALF_UNUSED_NOERR (   name)

Definition at line 210 of file half.hpp.



Half rounding mode. In correspondence with FLT_ROUNDS from <cfloat> this symbol expands to the rounding mode used for half-precision operations. It is an alias for HALF_ROUND_STYLE.

See also: Documentation for FLT_ROUNDS

Definition at line 396 of file half.hpp.


#define HUGE_VALH   std::numeric_limits<half_float::half>::infinity()

Value signaling overflow. In correspondence with HUGE_VAL[F|L] from <cmath> this symbol expands to a positive value signaling the overflow of an operation, in particular it just evaluates to positive infinity.

See also: Documentation for HUGE_VAL

Definition at line 382 of file half.hpp.

Typedef Documentation

◆ false_type

typedef bool_type<false> false_type

Definition at line 472 of file half.hpp.

◆ int32

typedef long int32

Fastest unsigned integer of (at least) 32 bits width.

Definition at line 513 of file half.hpp.

◆ true_type

typedef bool_type<true> true_type

Definition at line 471 of file half.hpp.

◆ uint16

typedef unsigned short uint16

Unsigned integer of (at least) 16 bits width.

Definition at line 507 of file half.hpp.

◆ uint32

typedef unsigned long uint32

Fastest unsigned integer of (at least) 32 bits width.

Definition at line 510 of file half.hpp.

Function Documentation

◆ angle_arg()

uint32 half_float::detail::angle_arg ( unsigned int  abs,
int &  k 

Reduce argument for trigonometric functions.

abshalf-precision floating-point value
kvalue to take quarter period
abs reduced to [-pi/4,pi/4] as Q0.30

Definition at line 1604 of file half.hpp.

◆ area()

unsigned int half_float::detail::area ( unsigned int  arg)

Area function and postprocessing. This computes the value directly in Q2.30 using the representation asinh|acosh(x) = log(x+sqrt(x^2+|-1)).

Template Parameters
Rrounding mode to use
Strue for asinh, false for acosh
arghalf-precision argument
asinh|acosh(arg) converted to half-precision
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

Definition at line 1788 of file half.hpp.

◆ arithmetic_shift()

uint32 half_float::detail::arithmetic_shift ( uint32  arg,
int  i 

Platform-independent arithmetic right shift.

arginteger value in two's complement
ishift amount (at most 31)
arg right shifted for i bits with possible sign extension

Definition at line 605 of file half.hpp.

◆ atan2()

uint32 half_float::detail::atan2 ( uint32  my,
uint32  mx,
unsigned int  n = 31 

Fixed point arc tangent. This uses the CORDIC algorithm in vectoring mode.

myy coordinate as Q0.30
mxx coordinate as Q0.30
nnumber of iterations (at most 31)
arc tangent of my / mx as Q1.30

Definition at line 1582 of file half.hpp.

◆ atan2_args()

std::pair< uint32, uint32 > half_float::detail::atan2_args ( unsigned int  abs)

Get arguments for atan2 function.

abshalf-precision floating-point value
abs and sqrt(1 - abs^2) as Q0.30

Definition at line 1627 of file half.hpp.

◆ builtin_isinf()

bool half_float::detail::builtin_isinf ( arg)

Check for infinity.

Template Parameters
Targument type (builtin floating-point type)
argvalue to query
Return values
trueif infinity

Definition at line 546 of file half.hpp.

◆ builtin_isnan()

bool half_float::detail::builtin_isnan ( arg)

Check for NaN.

Template Parameters
Targument type (builtin floating-point type)
argvalue to query
Return values
trueif not a number

Definition at line 562 of file half.hpp.

◆ builtin_signbit()

bool half_float::detail::builtin_signbit ( arg)

Check sign.

Template Parameters
Targument type (builtin floating-point type)
argvalue to query
Return values
trueif signbit set

Definition at line 578 of file half.hpp.

◆ check_underflow()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::check_underflow ( unsigned int  arg)

Check value for underflow.

argnon-zero half-precision value to check
FE_UNDERFLOWif arg is subnormal

Definition at line 767 of file half.hpp.

◆ compsignal()

HALF_CONSTEXPR_NOERR bool half_float::detail::compsignal ( unsigned int  x,
unsigned int  y 

Check and signal for any NaN.

xfirst half-precision value to check
ysecond half-precision value to check
Return values
trueif either x or y is NaN
FE_INVALIDif x or y is NaN

Definition at line 679 of file half.hpp.

◆ divide64()

uint32 half_float::detail::divide64 ( uint32  x,
uint32  y,
int &  s 

64-bit division.

xupper 32 bit of dividend
svariable to store sticky bit for rounding
(x << 32) / y

Definition at line 1380 of file half.hpp.

◆ erf()

unsigned int half_float::detail::erf ( unsigned int  arg)

Error function and postprocessing. This computes the value directly in Q1.31 using the approximations given here.

Template Parameters
Rrounding mode to use
Ctrue for comlementary error function, false else
arghalf-precision function argument
approximated value of error function in half-precision
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

Definition at line 1918 of file half.hpp.

◆ errflags()

int & half_float::detail::errflags ( )

Internal exception flags.

reference to global exception flags

Definition at line 620 of file half.hpp.

◆ exp2()

uint32 half_float::detail::exp2 ( uint32  m,
unsigned int  n = 32 

Fixed point binary exponential. This uses the BKM algorithm in E-mode.

mexponent in [0,1) as Q0.31
nnumber of iterations (at most 32)
2 ^ m as Q1.31

Definition at line 1504 of file half.hpp.

◆ exp2_post()

unsigned int half_float::detail::exp2_post ( uint32  m,
int  exp,
bool  esign,
unsigned int  sign = 0,
unsigned int  n = 32 

Postprocessing for binary exponential.

Template Parameters
Rrounding mode to use
mfractional part of as Q0.31
expabsolute value of unbiased exponent
esignsign of actual exponent
signsign bit of result
nnumber of BKM iterations (at most 32)
value converted to half-precision
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is true

Definition at line 1688 of file half.hpp.

◆ fixed2half()

unsigned int half_float::detail::fixed2half ( uint32  m,
int  exp = 14,
unsigned int  sign = 0,
int  s = 0 

Convert fixed point to half-precision floating-point.

Template Parameters
Rrounding mode to use
Fnumber of fractional bits in [11,31]
Strue for signed, false for unsigned
Ntrue for additional normalization step, false if already normalized to type(1)
Itrue to always raise INEXACT exception, false to raise only for rounded results
mmantissa in Q1.F fixed point format
expbiased exponent - 1
signhalf-precision value with sign bit only
ssticky bit (or of all but the most significant already discarded bits)
value converted to half-precision
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is true

Definition at line 884 of file half.hpp.

◆ float2half()

unsigned int half_float::detail::float2half ( value)

Convert floating-point to half-precision.

Template Parameters
Rrounding mode to use
Tsource type (builtin floating-point type)
valuefloating-point value to convert
rounded half-precision value
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

Definition at line 1064 of file half.hpp.

◆ float2half_impl() [1/3]

unsigned int half_float::detail::float2half_impl ( double  value,

Convert IEEE double-precision to half-precision.

Template Parameters
Rrounding mode to use
valuedouble-precision value to convert
rounded half-precision value
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

Definition at line 995 of file half.hpp.

◆ float2half_impl() [2/3]

unsigned int half_float::detail::float2half_impl ( float  value,

Convert IEEE single-precision to half-precision. Credit for this goes to Jeroen van der Zijp.

Template Parameters
Rrounding mode to use
valuesingle-precision value to convert
rounded half-precision value
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

Definition at line 907 of file half.hpp.

◆ float2half_impl() [3/3]

unsigned int half_float::detail::float2half_impl ( value,

Convert non-IEEE floating-point to half-precision.

Template Parameters
Rrounding mode to use
Tsource type (builtin floating-point type)
valuefloating-point value to convert
rounded half-precision value
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

Definition at line 1031 of file half.hpp.

◆ gamma()

unsigned int half_float::detail::gamma ( unsigned int  arg)

Gamma function and postprocessing. This approximates the value of either the gamma function or its logarithm directly in Q1.31.

Template Parameters
Rrounding mode to use
Ltrue for lograithm of gamma function, false for gamma function
arghalf-precision floating-point value
lgamma/tgamma(arg) in half-precision
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif arg is not a positive integer

Definition at line 1937 of file half.hpp.

◆ half2float()

T half_float::detail::half2float ( unsigned int  value)

Convert half-precision to floating-point.

Template Parameters
Ttype to convert to (builtin integer type)
valuehalf-precision value to convert
floating-point value

Definition at line 1303 of file half.hpp.

◆ half2float_impl() [1/3]

double half_float::detail::half2float_impl ( unsigned int  value,
double  ,

Convert half-precision to IEEE double-precision.

valuehalf-precision value to convert
double-precision value

Definition at line 1259 of file half.hpp.

◆ half2float_impl() [2/3]

float half_float::detail::half2float_impl ( unsigned int  value,
float  ,

Convert half-precision to IEEE single-precision. Credit for this goes to Jeroen van der Zijp.

valuehalf-precision value to convert
single-precision value

Definition at line 1096 of file half.hpp.

◆ half2float_impl() [3/3]

T half_float::detail::half2float_impl ( unsigned int  value,

Convert half-precision to non-IEEE floating-point.

Template Parameters
Ttype to convert to (builtin integer type)
valuehalf-precision value to convert
floating-point value

Definition at line 1283 of file half.hpp.

◆ half2int()

T half_float::detail::half2int ( unsigned int  value)

Convert half-precision floating-point to integer.

Template Parameters
Rrounding mode to use
Etrue for round to even, false for round away from zero
Itrue to raise INEXACT exception (if inexact), false to never raise it
Ttype to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
valuehalf-precision value to convert
rounded integer value
FE_INVALIDif value is not representable in type T
FE_INEXACTif value had to be rounded and I is true

Definition at line 1317 of file half.hpp.

◆ hyperbolic_args()

std::pair< uint32, uint32 > half_float::detail::hyperbolic_args ( unsigned int  abs,
int &  exp,
unsigned int  n = 32 

Get exponentials for hyperbolic computation

abshalf-precision floating-point value
expvariable to take unbiased exponent of larger result
nnumber of BKM iterations (at most 32)
exp(abs) and exp(-abs) as Q1.31 with same exponent

Definition at line 1650 of file half.hpp.

◆ hypot_post()

unsigned int half_float::detail::hypot_post ( uint32  r,
int  exp 

Hypotenuse square root and postprocessing.

Template Parameters
Rrounding mode to use
rmantissa as Q2.30
expbiased exponent
square root converted to half-precision
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded

Definition at line 1745 of file half.hpp.

◆ int2half()

unsigned int half_float::detail::int2half ( value)

Convert integer to half-precision floating-point.

Template Parameters
Rrounding mode to use
Ttype to convert (builtin integer type)
valueintegral value to convert
rounded half-precision value
FE_OVERFLOWon overflows
FE_INEXACTif value had to be rounded

Definition at line 1076 of file half.hpp.

◆ integral()

unsigned int half_float::detail::integral ( unsigned int  value)

Round half-precision number to nearest integer value.

Template Parameters
Rrounding mode to use
Etrue for round to even, false for round away from zero
Itrue to raise INEXACT exception (if inexact), false to never raise it
valuehalf-precision value to round
half-precision bits for nearest integral value
FE_INVALIDfor signaling NaN
FE_INEXACTif value had to be rounded and I is true

Definition at line 849 of file half.hpp.

◆ invalid()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::invalid ( )

Raise domain error and return NaN. return quiet NaN


Definition at line 743 of file half.hpp.

◆ log2()

uint32 half_float::detail::log2 ( uint32  m,
unsigned int  n = 32 

Fixed point binary logarithm. This uses the BKM algorithm in L-mode.

mmantissa in [1,2) as Q1.30
nnumber of iterations (at most 32)
log2(m) as Q0.31

Definition at line 1531 of file half.hpp.

◆ log2_post()

unsigned int half_float::detail::log2_post ( uint32  m,
int  ilog,
int  exp,
unsigned int  sign = 0 

Postprocessing for binary logarithm.

Template Parameters
Rrounding mode to use
Llogarithm for base transformation as Q1.31
mfractional part of logarithm as Q0.31
ilogsigned integer part of logarithm
expbiased exponent of result
signsign bit of result
value base-transformed and converted to half-precision
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

Definition at line 1720 of file half.hpp.

◆ mod()

unsigned int half_float::detail::mod ( unsigned int  x,
unsigned int  y,
int *  quo = NULL 

Half precision positive modulus.

Template Parameters
Qtrue to compute full quotient, false else
Rtrue to compute signed remainder, false for positive remainder
xfirst operand as positive finite half-precision value
ysecond operand as positive finite half-precision value
quoadress to store quotient at, nullptr if Q false
modulus of x / y

Definition at line 1409 of file half.hpp.

◆ mulhi()

uint32 half_float::detail::mulhi ( uint32  x,
uint32  y 

upper part of 64-bit multiplication.

Template Parameters
Rrounding mode to use
xfirst factor
ysecond factor
upper 32 bit of x * y

Definition at line 1355 of file half.hpp.

◆ multiply64()

uint32 half_float::detail::multiply64 ( uint32  x,
uint32  y 

64-bit multiplication.

xfirst factor
ysecond factor
upper 32 bit of x * y rounded to nearest

Definition at line 1366 of file half.hpp.

◆ overflow()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::overflow ( unsigned int  sign = 0)

Half-precision overflow.

Template Parameters
Rrounding mode to use
signhalf-precision value with sign bit only
rounded overflowing half-precision value

Definition at line 784 of file half.hpp.

◆ pole()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::pole ( unsigned int  sign = 0)

Raise pole error and return infinity.

signhalf-precision value with sign bit only
half-precision infinity with sign of sign

Definition at line 755 of file half.hpp.

◆ raise()

void half_float::detail::raise ( int   HALF_UNUSED_NOERRflags,
bool   HALF_UNUSED_NOERRcond = true 

Raise floating-point exception.

flagsexceptions to raise
condcondition to raise exceptions for

Definition at line 625 of file half.hpp.

◆ rounded()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::rounded ( unsigned int  value,
int  g,
int  s 

Round half-precision number.

Template Parameters
Rrounding mode to use
Itrue to always raise INEXACT exception, false to raise only for rounded results
valuefinite half-precision number to round
gguard bit (most significant discarded bit)
ssticky bit (or of all but the most significant discarded bits)
rounded half-precision value
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif value had to be rounded or I is true

Definition at line 820 of file half.hpp.

◆ select()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::select ( unsigned int  x,
unsigned int   HALF_UNUSED_NOERR

Select value or signaling NaN.

xpreferred half-precision value
yignored half-precision value except for signaling NaN
y if signaling NaN, x otherwise
FE_INVALIDif y is signaling NaN

Definition at line 731 of file half.hpp.

◆ sign_mask()

uint32 half_float::detail::sign_mask ( uint32  arg)

Platform-independent sign mask.

arginteger value in two's complement
Return values
-1if arg negative
0if arg positive

Definition at line 591 of file half.hpp.

◆ signal() [1/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  nan)

Signal and silence signaling NaN.

nanhalf-precision NaN value
quiet NaN
FE_INVALIDif nan is signaling NaN

Definition at line 691 of file half.hpp.

◆ signal() [2/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  x,
unsigned int  y 

Signal and silence signaling NaNs.

xfirst half-precision value to check
ysecond half-precision value to check
quiet NaN
FE_INVALIDif x or y is signaling NaN

Definition at line 704 of file half.hpp.

◆ signal() [3/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int  x,
unsigned int  y,
unsigned int  z 

Signal and silence signaling NaNs.

xfirst half-precision value to check
ysecond half-precision value to check
zthird half-precision value to check
quiet NaN
FE_INVALIDif x, y or z is signaling NaN

Definition at line 718 of file half.hpp.

◆ sincos()

std::pair< uint32, uint32 > half_float::detail::sincos ( uint32  mz,
unsigned int  n = 31 

Fixed point sine and cosine. This uses the CORDIC algorithm in rotation mode.

mzangle in [-pi/2,pi/2] as Q1.30
nnumber of iterations (at most 31)
sine and cosine of mz as Q1.30

Definition at line 1558 of file half.hpp.

◆ sqrt()

uint32 half_float::detail::sqrt ( uint32 r,
int &  exp 

Fixed point square root.

Template Parameters
Fnumber of fractional bits
rradicand in Q1.F fixed point format
square root as Q1.F/2

Definition at line 1480 of file half.hpp.

◆ tangent_post()

unsigned int half_float::detail::tangent_post ( uint32  my,
uint32  mx,
int  exp,
unsigned int  sign = 0 

Division and postprocessing for tangents.

Template Parameters
Rrounding mode to use
mydividend as Q1.31
mxdivisor as Q1.31
expbiased exponent of result
signsign bit of result
quotient converted to half-precision
FE_OVERFLOWon overflows
FE_UNDERFLOWon underflows
FE_INEXACTif no other exception occurred

Definition at line 1767 of file half.hpp.

◆ underflow()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::underflow ( unsigned int  sign = 0)

Half-precision underflow.

Template Parameters
Rrounding mode to use
signhalf-precision value with sign bit only
rounded underflowing half-precision value

Definition at line 800 of file half.hpp.

Variable Documentation

◆ binary

HALF_CONSTEXPR_CONST binary_t binary = binary_t()

Tag for binary construction.

Definition at line 536 of file half.hpp.