#include <utility>
#include <algorithm>
#include <istream>
#include <ostream>
#include <limits>
#include <stdexcept>
#include <climits>
#include <cmath>
#include <cstring>
#include <cstdlib>

Classes
struct	conditional< bool, T, typename >
	Conditional type. More...

struct	conditional< false, T, F >

struct	bool_type< bool >
	Helper for tag dispatching. More...

struct	is_float< typename >
	Type traits for floating-point types. More...

struct	is_float< const T >

struct	is_float< volatile T >

struct	is_float< const volatile T >

struct	is_float< float >

struct	is_float< double >

struct	is_float< long double >

struct	bits< T >
	Type traits for floating-point bits. More...

struct	bits< const T >

struct	bits< volatile T >

struct	bits< const volatile T >

struct	bits< float >
	Unsigned integer of (at least) 32 bits width. More...

struct	bits< double >
	Unsigned integer of (at least) 64 bits width. More...

struct	binary_t
	Tag type for binary construction. More...

struct	f31
	Class for 1.31 unsigned floating-point computation. More...

class	half

struct	half_caster< T, U, R >

struct	half_caster< half, U, R >

struct	half_caster< T, half, R >

struct	half_caster< half, half, R >

class	numeric_limits< half_float::half >

Namespaces
namespace	half_float

namespace	std
	Extensions to the C++ standard library.

Macros
#define	HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__)

#define	HALF_ICC_VERSION 0

#define	HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS\|\|HALF_ERRHANDLING_ERRNO\|\|HALF_ERRHANDLING_FENV\|\|HALF_ERRHANDLING_THROWS)

#define	HALF_UNUSED_NOERR(name)

#define	HALF_CONSTEXPR

#define	HALF_CONSTEXPR_CONST const

#define	HALF_CONSTEXPR_NOERR

#define	HALF_NOEXCEPT

#define	HALF_NOTHROW throw()

#define	HALF_THREAD_LOCAL static

#define	HALF_ENABLE_F16C_INTRINSICS __F16C__

#define	HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1

#define	HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1

#define	HALF_ROUND_STYLE 1

#define	HUGE_VALH std::numeric_limits<half_float::half>::infinity()

#define	FP_FAST_FMAH 1

#define	HLF_ROUNDS HALF_ROUND_STYLE

#define	FP_ILOGB0 INT_MIN

#define	FP_ILOGBNAN INT_MAX

#define	FP_SUBNORMAL 0

#define	FP_ZERO 1

#define	FP_NAN 2

#define	FP_INFINITE 3

#define	FP_NORMAL 4

#define	FE_INVALID 0x10

#define	FE_DIVBYZERO 0x08

#define	FE_OVERFLOW 0x04

#define	FE_UNDERFLOW 0x02

#define	FE_INEXACT 0x01

#define	FE_ALL_EXCEPT (FE_INVALID\|FE_DIVBYZERO\|FE_OVERFLOW\|FE_UNDERFLOW\|FE_INEXACT)

Typedefs
typedef bool_type< true >	true_type

typedef bool_type< false >	false_type

typedef unsigned short	uint16
	Unsigned integer of (at least) 16 bits width. More...

typedef unsigned long	uint32
	Fastest unsigned integer of (at least) 32 bits width. More...

typedef long	int32
	Fastest unsigned integer of (at least) 32 bits width. More...

Functions
Implementation defined classification and arithmetic
template<typename T >
bool	builtin_isinf (T arg)

template<typename T >
bool	builtin_isnan (T arg)

template<typename T >
bool	builtin_signbit (T arg)

uint32	sign_mask (uint32 arg)

uint32	arithmetic_shift (uint32 arg, int i)

Error handling
int &	errflags ()

void	raise (int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true)

HALF_CONSTEXPR_NOERR bool	compsignal (unsigned int x, unsigned int y)

HALF_CONSTEXPR_NOERR unsigned int	signal (unsigned int nan)

HALF_CONSTEXPR_NOERR unsigned int	signal (unsigned int x, unsigned int y)

HALF_CONSTEXPR_NOERR unsigned int	signal (unsigned int x, unsigned int y, unsigned int z)

HALF_CONSTEXPR_NOERR unsigned int	select (unsigned int x, unsigned int HALF_UNUSED_NOERR(y))

HALF_CONSTEXPR_NOERR unsigned int	invalid ()

HALF_CONSTEXPR_NOERR unsigned int	pole (unsigned int sign=0)

HALF_CONSTEXPR_NOERR unsigned int	check_underflow (unsigned int arg)

Conversion and rounding
template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int	overflow (unsigned int sign=0)

template<std::float_round_style R>
HALF_CONSTEXPR_NOERR unsigned int	underflow (unsigned int sign=0)

template<std::float_round_style R, bool I>
HALF_CONSTEXPR_NOERR unsigned int	rounded (unsigned int value, int g, int s)

template<std::float_round_style R, bool E, bool I>
unsigned int	integral (unsigned int value)

template<std::float_round_style R, unsigned int F, bool S, bool N, bool I>
unsigned int	fixed2half (uint32 m, int exp=14, unsigned int sign=0, int s=0)

template<std::float_round_style R>
unsigned int	float2half_impl (float value, true_type)

template<std::float_round_style R>
unsigned int	float2half_impl (double value, true_type)

template<std::float_round_style R, typename T >
unsigned int	float2half_impl (T value,...)

template<std::float_round_style R, typename T >
unsigned int	float2half (T value)

template<std::float_round_style R, typename T >
unsigned int	int2half (T value)

float	half2float_impl (unsigned int value, float, true_type)

double	half2float_impl (unsigned int value, double, true_type)

template<typename T >
T	half2float_impl (unsigned int value, T,...)

template<typename T >
T	half2float (unsigned int value)

template<std::float_round_style R, bool E, bool I, typename T >
T	half2int (unsigned int value)

Mathematics
template<std::float_round_style R>
uint32	mulhi (uint32 x, uint32 y)

uint32	multiply64 (uint32 x, uint32 y)

uint32	divide64 (uint32 x, uint32 y, int &s)

template<bool Q, bool R>
unsigned int	mod (unsigned int x, unsigned int y, int *quo=NULL)

template<unsigned int F>
uint32	sqrt (uint32 &r, int &exp)

uint32	exp2 (uint32 m, unsigned int n=32)

uint32	log2 (uint32 m, unsigned int n=32)

std::pair< uint32, uint32 >	sincos (uint32 mz, unsigned int n=31)

uint32	atan2 (uint32 my, uint32 mx, unsigned int n=31)

uint32	angle_arg (unsigned int abs, int &k)

std::pair< uint32, uint32 >	atan2_args (unsigned int abs)

std::pair< uint32, uint32 >	hyperbolic_args (unsigned int abs, int &exp, unsigned int n=32)

template<std::float_round_style R>
unsigned int	exp2_post (uint32 m, int exp, bool esign, unsigned int sign=0, unsigned int n=32)

template<std::float_round_style R, uint32 L>
unsigned int	log2_post (uint32 m, int ilog, int exp, unsigned int sign=0)

template<std::float_round_style R>
unsigned int	hypot_post (uint32 r, int exp)

template<std::float_round_style R>
unsigned int	tangent_post (uint32 my, uint32 mx, int exp, unsigned int sign=0)

template<std::float_round_style R, bool S>
unsigned int	area (unsigned int arg)

template<std::float_round_style R, bool C>
unsigned int	erf (unsigned int arg)

template<std::float_round_style R, bool L>
unsigned int	gamma (unsigned int arg)

Comparison operators

HALF_CONSTEXPR_NOERR bool	operator== (half x, half y)

HALF_CONSTEXPR_NOERR bool	operator!= (half x, half y)

HALF_CONSTEXPR_NOERR bool	operator< (half x, half y)

HALF_CONSTEXPR_NOERR bool	operator> (half x, half y)

HALF_CONSTEXPR_NOERR bool	operator<= (half x, half y)

HALF_CONSTEXPR_NOERR bool	operator>= (half x, half y)

Arithmetic operators

HALF_CONSTEXPR half	operator+ (half arg)

HALF_CONSTEXPR half	operator- (half arg)

half	operator+ (half x, half y)

half	operator- (half x, half y)

half	operator* (half x, half y)

half	operator/ (half x, half y)

Input and output

template<typename charT , typename traits >
std::basic_ostream< charT, traits > &	operator<< (std::basic_ostream< charT, traits > &out, half arg)

template<typename charT , typename traits >
std::basic_istream< charT, traits > &	operator>> (std::basic_istream< charT, traits > &in, half &arg)

Basic mathematical operations

HALF_CONSTEXPR half	fabs (half arg)

HALF_CONSTEXPR half	abs (half arg)

half	fmod (half x, half y)

half	remainder (half x, half y)

half	remquo (half x, half y, int *quo)

half	fma (half x, half y, half z)

HALF_CONSTEXPR_NOERR half	fmax (half x, half y)

HALF_CONSTEXPR_NOERR half	fmin (half x, half y)

half	fdim (half x, half y)

half	nanh (const char *arg)

Exponential functions

half	exp (half arg)

half	exp2 (half arg)

half	expm1 (half arg)

half	log (half arg)

half	log10 (half arg)

half	log2 (half arg)

half	log1p (half arg)

Power functions

half	sqrt (half arg)

half	rsqrt (half arg)

half	cbrt (half arg)

half	hypot (half x, half y)

half	hypot (half x, half y, half z)

half	pow (half x, half y)

Trigonometric functions

void	sincos (half arg, half sin, half cos)

half	sin (half arg)

half	cos (half arg)

half	tan (half arg)

half	asin (half arg)

half	acos (half arg)

half	atan (half arg)

half	atan2 (half y, half x)

Hyperbolic functions

half	sinh (half arg)

half	cosh (half arg)

half	tanh (half arg)

half	asinh (half arg)

half	acosh (half arg)

half	atanh (half arg)

Error and gamma functions

half	erf (half arg)

half	erfc (half arg)

half	lgamma (half arg)

half	tgamma (half arg)

Rounding

half	ceil (half arg)

half	floor (half arg)

half	trunc (half arg)

half	round (half arg)

long	lround (half arg)

half	rint (half arg)

long	lrint (half arg)

half	nearbyint (half arg)

Floating point manipulation

half	frexp (half arg, int *exp)

half	scalbln (half arg, long exp)

half	scalbn (half arg, int exp)

half	ldexp (half arg, int exp)

half	modf (half arg, half *iptr)

int	ilogb (half arg)

half	logb (half arg)

half	nextafter (half from, half to)

half	nexttoward (half from, long double to)

HALF_CONSTEXPR half	copysign (half x, half y)

Floating point classification

HALF_CONSTEXPR int	fpclassify (half arg)

HALF_CONSTEXPR bool	isfinite (half arg)

HALF_CONSTEXPR bool	isinf (half arg)

HALF_CONSTEXPR bool	isnan (half arg)

HALF_CONSTEXPR bool	isnormal (half arg)

HALF_CONSTEXPR bool	signbit (half arg)

Comparison

HALF_CONSTEXPR bool	isgreater (half x, half y)

HALF_CONSTEXPR bool	isgreaterequal (half x, half y)

HALF_CONSTEXPR bool	isless (half x, half y)

HALF_CONSTEXPR bool	islessequal (half x, half y)

HALF_CONSTEXPR bool	islessgreater (half x, half y)

HALF_CONSTEXPR bool	isunordered (half x, half y)

Casting

template<typename T , typename U >
T	half_cast (U arg)

template<typename T , std::float_round_style R, typename U >
T	half_cast (U arg)

Error handling

int	feclearexcept (int excepts)

int	fetestexcept (int excepts)

int	feraiseexcept (int excepts)

int	fegetexceptflag (int *flagp, int excepts)

int	fesetexceptflag (const int *flagp, int excepts)

void	fethrowexcept (int excepts, const char *msg="")

Variables
HALF_CONSTEXPR_CONST binary_t	binary = binary_t()
	Tag for binary construction. More...

Detailed Description

Main header file for half-precision functionality.

Definition in file half.hpp.

Macro Definition Documentation

◆ FE_ALL_EXCEPT

#define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT)

Definition at line 426 of file half.hpp.

◆ FE_DIVBYZERO

#define FE_DIVBYZERO 0x08

Definition at line 422 of file half.hpp.

◆ FE_INEXACT

#define FE_INEXACT 0x01

Definition at line 425 of file half.hpp.

◆ FE_INVALID

#define FE_INVALID 0x10

Definition at line 421 of file half.hpp.

◆ FE_OVERFLOW

#define FE_OVERFLOW 0x04

Definition at line 423 of file half.hpp.

◆ FE_UNDERFLOW

#define FE_UNDERFLOW 0x02

Definition at line 424 of file half.hpp.

◆ FP_FAST_FMAH

#define FP_FAST_FMAH 1

Fast half-precision fma function. This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate half-precision multiplication followed by an addition, which is always the case.

See also: Documentation for FP_FAST_FMA

Definition at line 389 of file half.hpp.

◆ FP_ILOGB0

#define FP_ILOGB0 INT_MIN

Definition at line 399 of file half.hpp.

◆ FP_ILOGBNAN

#define FP_ILOGBNAN INT_MAX

Definition at line 402 of file half.hpp.

◆ FP_INFINITE

#define FP_INFINITE 3

Definition at line 414 of file half.hpp.

◆ FP_NAN

#define FP_NAN 2

Definition at line 411 of file half.hpp.

◆ FP_NORMAL

#define FP_NORMAL 4

Definition at line 417 of file half.hpp.

◆ FP_SUBNORMAL

#define FP_SUBNORMAL 0

Definition at line 405 of file half.hpp.

◆ FP_ZERO

#define FP_ZERO 1

Definition at line 408 of file half.hpp.

◆ HALF_CONSTEXPR

#define HALF_CONSTEXPR

Definition at line 223 of file half.hpp.

◆ HALF_CONSTEXPR_CONST

#define HALF_CONSTEXPR_CONST const

Definition at line 224 of file half.hpp.

◆ HALF_CONSTEXPR_NOERR

#define HALF_CONSTEXPR_NOERR

Definition at line 225 of file half.hpp.

◆ HALF_ENABLE_F16C_INTRINSICS

#define HALF_ENABLE_F16C_INTRINSICS __F16C__

Enable F16C intruction set intrinsics. Defining this to 1 enables the use of F16C compiler intrinsics for converting between half-precision and single-precision values which may result in improved performance. This will not perform additional checks for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature.

Unless predefined it will be enabled automatically when the __F16C__ symbol is defined, which some compilers do on supporting platforms.

Definition at line 278 of file half.hpp.

◆ HALF_ERRHANDLING

#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS)

Definition at line 205 of file half.hpp.

◆ HALF_ERRHANDLING_OVERFLOW_TO_INEXACT

#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1

Raise INEXACT exception on overflow. Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.

Definition at line 342 of file half.hpp.

◆ HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT

#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1

Raise INEXACT exception on underflow. Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition. These will be raised after any possible handling of the underflow exception.

Note: This will actually cause underflow (and the accompanying inexact) exceptions to be raised only when the result is inexact, while if disabled bare underflow errors will be raised for any (possibly exact) subnormal result.

Definition at line 352 of file half.hpp.

◆ HALF_GCC_VERSION

#define HALF_GCC_VERSION (__GNUC__*100+__GNUC_MINOR__)

Definition at line 25 of file half.hpp.

◆ HALF_ICC_VERSION

#define HALF_ICC_VERSION 0

Definition at line 34 of file half.hpp.

◆ HALF_NOEXCEPT

#define HALF_NOEXCEPT

Definition at line 233 of file half.hpp.

◆ HALF_NOTHROW

#define HALF_NOTHROW throw()

Definition at line 234 of file half.hpp.

◆ HALF_ROUND_STYLE

#define HALF_ROUND_STYLE 1

Default rounding mode. This specifies the rounding mode used for all conversions between halfs and more precise types (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective constants or the equivalent values of std::float_round_style:

`std::float_round_style`	value	rounding
`std::round_indeterminate`	-1	fastest
`std::round_toward_zero`	0	toward zero
`std::round_to_nearest`	1	to nearest (default)
`std::round_toward_infinity`	2	toward positive infinity
`std::round_toward_neg_infinity`	3	toward negative infinity

By default this is set to 1 (std::round_to_nearest), which rounds results to the nearest representable value. It can even be set to std::numeric_limits<float>::round_style to synchronize the rounding mode with that of the built-in single-precision implementation (which is likely std::round_to_nearest, though).

Definition at line 374 of file half.hpp.

◆ HALF_THREAD_LOCAL

#define HALF_THREAD_LOCAL static

Definition at line 241 of file half.hpp.

◆ HALF_UNUSED_NOERR

#define HALF_UNUSED_NOERR ( name )

Definition at line 210 of file half.hpp.

◆ HLF_ROUNDS

#define HLF_ROUNDS HALF_ROUND_STYLE

Half rounding mode. In correspondence with FLT_ROUNDS from <cfloat> this symbol expands to the rounding mode used for half-precision operations. It is an alias for HALF_ROUND_STYLE.

See also: Documentation for FLT_ROUNDS

Definition at line 396 of file half.hpp.

◆ HUGE_VALH

#define HUGE_VALH std::numeric_limits<half_float::half>::infinity()

Value signaling overflow. In correspondence with HUGE_VAL[F|L] from <cmath> this symbol expands to a positive value signaling the overflow of an operation, in particular it just evaluates to positive infinity.

See also: Documentation for HUGE_VAL

Definition at line 382 of file half.hpp.

Typedef Documentation

◆ false_type

typedef bool_type<false> false_type

Definition at line 472 of file half.hpp.

◆ int32

typedef long int32

Fastest unsigned integer of (at least) 32 bits width.

Definition at line 513 of file half.hpp.

◆ true_type

typedef bool_type<true> true_type

Definition at line 471 of file half.hpp.

◆ uint16

typedef unsigned short uint16

Unsigned integer of (at least) 16 bits width.

Definition at line 507 of file half.hpp.

◆ uint32

typedef unsigned long uint32

Fastest unsigned integer of (at least) 32 bits width.

Definition at line 510 of file half.hpp.

Function Documentation

◆ angle_arg()

uint32 half_float::detail::angle_arg	(	unsigned int	abs,
		int &	k
	)

inline

Reduce argument for trigonometric functions.

Parameters

abs	half-precision floating-point value
k	value to take quarter period

Returns: abs reduced to [-pi/4,pi/4] as Q0.30

Definition at line 1604 of file half.hpp.

◆ area()

unsigned int half_float::detail::area ( unsigned int arg )

Area function and postprocessing. This computes the value directly in Q2.30 using the representation asinh|acosh(x) = log(x+sqrt(x^2+|-1)).

Template Parameters

R	rounding mode to use
S	`true` for asinh, `false` for acosh

Parameters

arg	half-precision argument

Returns: asinh|acosh(arg) converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if no other exception occurred

Definition at line 1788 of file half.hpp.

◆ arithmetic_shift()

uint32 half_float::detail::arithmetic_shift	(	uint32	arg,
		int	i
	)

inline

Platform-independent arithmetic right shift.

Parameters

arg	integer value in two's complement
i	shift amount (at most 31)

Returns: arg right shifted for i bits with possible sign extension

Definition at line 605 of file half.hpp.

◆ atan2()

uint32 half_float::detail::atan2	(	uint32	my,
		uint32	mx,
		unsigned int	n = `31`
	)

inline

Fixed point arc tangent. This uses the CORDIC algorithm in vectoring mode.

Parameters

my	y coordinate as Q0.30
mx	x coordinate as Q0.30
n	number of iterations (at most 31)

Returns: arc tangent of my / mx as Q1.30

Definition at line 1582 of file half.hpp.

◆ atan2_args()

std::pair< uint32, uint32 > half_float::detail::atan2_args ( unsigned int abs )

inline

Get arguments for atan2 function.

Parameters

abs	half-precision floating-point value

Returns: abs and sqrt(1 - abs^2) as Q0.30

Definition at line 1627 of file half.hpp.

◆ builtin_isinf()

bool half_float::detail::builtin_isinf ( T arg )

Check for infinity.

Template Parameters

T	argument type (builtin floating-point type)

Parameters

arg	value to query

Return values

true	if infinity
false	else

Definition at line 546 of file half.hpp.

◆ builtin_isnan()

bool half_float::detail::builtin_isnan ( T arg )

Check for NaN.

Template Parameters

T	argument type (builtin floating-point type)

Parameters

arg	value to query

Return values

true	if not a number
false	else

Definition at line 562 of file half.hpp.

◆ builtin_signbit()

bool half_float::detail::builtin_signbit ( T arg )

Check sign.

Template Parameters

T	argument type (builtin floating-point type)

Parameters

arg	value to query

Return values

true	if signbit set
false	else

Definition at line 578 of file half.hpp.

◆ check_underflow()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::check_underflow ( unsigned int arg )

inline

Check value for underflow.

Parameters

arg	non-zero half-precision value to check

Returns: arg

Exceptions

FE_UNDERFLOW if arg is subnormal

Definition at line 767 of file half.hpp.

◆ compsignal()

HALF_CONSTEXPR_NOERR bool half_float::detail::compsignal	(	unsigned int	x,
		unsigned int	y
	)

inline

Check and signal for any NaN.

Parameters

x	first half-precision value to check
y	second half-precision value to check

Return values

true	if either x or y is NaN
false	else

Exceptions

FE_INVALID if x or y is NaN

Definition at line 679 of file half.hpp.

◆ divide64()

uint32 half_float::detail::divide64	(	uint32	x,
		uint32	y,
		int &	s
	)

inline

64-bit division.

Parameters

x	upper 32 bit of dividend
y	divisor
s	variable to store sticky bit for rounding

Returns: (x << 32) / y

Definition at line 1380 of file half.hpp.

◆ erf()

unsigned int half_float::detail::erf ( unsigned int arg )

Error function and postprocessing. This computes the value directly in Q1.31 using the approximations given here.

Template Parameters

R	rounding mode to use
C	`true` for comlementary error function, `false` else

Parameters

arg	half-precision function argument

Returns: approximated value of error function in half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if no other exception occurred

Definition at line 1918 of file half.hpp.

◆ errflags()

int & half_float::detail::errflags ( )

inline

Internal exception flags.

Returns: reference to global exception flags

Definition at line 620 of file half.hpp.

◆ exp2()

uint32 half_float::detail::exp2	(	uint32	m,
		unsigned int	n = `32`
	)

inline

Fixed point binary exponential. This uses the BKM algorithm in E-mode.

Parameters

m	exponent in [0,1) as Q0.31
n	number of iterations (at most 32)

Returns: 2 ^ m as Q1.31

Definition at line 1504 of file half.hpp.

◆ exp2_post()

unsigned int half_float::detail::exp2_post	(	uint32	m,
		int	exp,
		bool	esign,
		unsigned int	sign = `0`,
		unsigned int	n = `32`
	)

Postprocessing for binary exponential.

Template Parameters

R	rounding mode to use

Parameters

m	fractional part of as Q0.31
exp	absolute value of unbiased exponent
esign	sign of actual exponent
sign	sign bit of result
n	number of BKM iterations (at most 32)

Returns: value converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded or I is `true`

Definition at line 1688 of file half.hpp.

◆ fixed2half()

unsigned int half_float::detail::fixed2half	(	uint32	m,
		int	exp = `14`,
		unsigned int	sign = `0`,
		int	s = `0`
	)

Convert fixed point to half-precision floating-point.

Template Parameters

R	rounding mode to use
F	number of fractional bits in [11,31]
S	`true` for signed, `false` for unsigned
N	`true` for additional normalization step, `false` if already normalized to type(1)
I	`true` to always raise INEXACT exception, `false` to raise only for rounded results

Parameters

m	mantissa in Q1.F fixed point format
exp	biased exponent - 1
sign	half-precision value with sign bit only
s	sticky bit (or of all but the most significant already discarded bits)

Returns: value converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded or I is `true`

Definition at line 884 of file half.hpp.

◆ float2half()

unsigned int half_float::detail::float2half ( T value )

Convert floating-point to half-precision.

Template Parameters

R	rounding mode to use
T	source type (builtin floating-point type)

Parameters

value floating-point value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 1064 of file half.hpp.

◆ float2half_impl() [1/3]

unsigned int half_float::detail::float2half_impl	(	double	value,
		true_type
	)

Convert IEEE double-precision to half-precision.

Template Parameters

R	rounding mode to use

Parameters

value double-precision value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 995 of file half.hpp.

◆ float2half_impl() [2/3]

unsigned int half_float::detail::float2half_impl	(	float	value,
		true_type
	)

Convert IEEE single-precision to half-precision. Credit for this goes to Jeroen van der Zijp.

Template Parameters

R	rounding mode to use

Parameters

value single-precision value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 907 of file half.hpp.

◆ float2half_impl() [3/3]

unsigned int half_float::detail::float2half_impl	(	T	value,
			...
	)

Convert non-IEEE floating-point to half-precision.

Template Parameters

R	rounding mode to use
T	source type (builtin floating-point type)

Parameters

value floating-point value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 1031 of file half.hpp.

◆ gamma()

unsigned int half_float::detail::gamma ( unsigned int arg )

Gamma function and postprocessing. This approximates the value of either the gamma function or its logarithm directly in Q1.31.

Template Parameters

R	rounding mode to use
L	`true` for lograithm of gamma function, `false` for gamma function

Parameters

arg	half-precision floating-point value

Returns: lgamma/tgamma(arg) in half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if arg is not a positive integer

Definition at line 1937 of file half.hpp.

◆ half2float()

T half_float::detail::half2float ( unsigned int value )

Convert half-precision to floating-point.

Template Parameters

T	type to convert to (builtin integer type)

Parameters

value half-precision value to convert

Returns: floating-point value

Definition at line 1303 of file half.hpp.

◆ half2float_impl() [1/3]

double half_float::detail::half2float_impl	(	unsigned int	value,
		double	,
		true_type
	)

inline

Convert half-precision to IEEE double-precision.

Parameters

value half-precision value to convert

Returns: double-precision value

Definition at line 1259 of file half.hpp.

◆ half2float_impl() [2/3]

float half_float::detail::half2float_impl	(	unsigned int	value,
		float	,
		true_type
	)

inline

Convert half-precision to IEEE single-precision. Credit for this goes to Jeroen van der Zijp.

Parameters

value half-precision value to convert

Returns: single-precision value

Definition at line 1096 of file half.hpp.

◆ half2float_impl() [3/3]

T half_float::detail::half2float_impl	(	unsigned int	value,
		T	,
			...
	)

Convert half-precision to non-IEEE floating-point.

Template Parameters

T	type to convert to (builtin integer type)

Parameters

value half-precision value to convert

Returns: floating-point value

Definition at line 1283 of file half.hpp.

◆ half2int()

T half_float::detail::half2int ( unsigned int value )

Convert half-precision floating-point to integer.

Template Parameters

R	rounding mode to use
E	`true` for round to even, `false` for round away from zero
I	`true` to raise INEXACT exception (if inexact), `false` to never raise it
T	type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)

Parameters

value half-precision value to convert

Returns: rounded integer value

Exceptions

FE_INVALID	if value is not representable in type T
FE_INEXACT	if value had to be rounded and I is `true`

Definition at line 1317 of file half.hpp.

◆ hyperbolic_args()

std::pair< uint32, uint32 > half_float::detail::hyperbolic_args	(	unsigned int	abs,
		int &	exp,
		unsigned int	n = `32`
	)

inline

Get exponentials for hyperbolic computation

Parameters

abs	half-precision floating-point value
exp	variable to take unbiased exponent of larger result
n	number of BKM iterations (at most 32)

Returns: exp(abs) and exp(-abs) as Q1.31 with same exponent

Definition at line 1650 of file half.hpp.

◆ hypot_post()

unsigned int half_float::detail::hypot_post	(	uint32	r,
		int	exp
	)

Hypotenuse square root and postprocessing.

Template Parameters

R	rounding mode to use

Parameters

r	mantissa as Q2.30
exp	biased exponent

Returns: square root converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 1745 of file half.hpp.

◆ int2half()

unsigned int half_float::detail::int2half ( T value )

Convert integer to half-precision floating-point.

Template Parameters

R	rounding mode to use
T	type to convert (builtin integer type)

Parameters

value integral value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_INEXACT	if value had to be rounded

Definition at line 1076 of file half.hpp.

◆ integral()

unsigned int half_float::detail::integral ( unsigned int value )

Round half-precision number to nearest integer value.

Template Parameters

R	rounding mode to use
E	`true` for round to even, `false` for round away from zero
I	`true` to raise INEXACT exception (if inexact), `false` to never raise it

Parameters

value half-precision value to round

Returns: half-precision bits for nearest integral value

Exceptions

FE_INVALID	for signaling NaN
FE_INEXACT	if value had to be rounded and I is `true`

Definition at line 849 of file half.hpp.

◆ invalid()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::invalid ( )

inline

Raise domain error and return NaN. return quiet NaN

Exceptions

FE_INVALID

Definition at line 743 of file half.hpp.

◆ log2()

uint32 half_float::detail::log2	(	uint32	m,
		unsigned int	n = `32`
	)

inline

Fixed point binary logarithm. This uses the BKM algorithm in L-mode.

Parameters

m	mantissa in [1,2) as Q1.30
n	number of iterations (at most 32)

Returns: log2(m) as Q0.31

Definition at line 1531 of file half.hpp.

◆ log2_post()

unsigned int half_float::detail::log2_post	(	uint32	m,
		int	ilog,
		int	exp,
		unsigned int	sign = `0`
	)

Postprocessing for binary logarithm.

Template Parameters

R	rounding mode to use
L	logarithm for base transformation as Q1.31

Parameters

m	fractional part of logarithm as Q0.31
ilog	signed integer part of logarithm
exp	biased exponent of result
sign	sign bit of result

Returns: value base-transformed and converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if no other exception occurred

Definition at line 1720 of file half.hpp.

◆ mod()

unsigned int half_float::detail::mod	(	unsigned int	x,
		unsigned int	y,
		int *	quo = `NULL`
	)

Half precision positive modulus.

Template Parameters

Q	`true` to compute full quotient, `false` else
R	`true` to compute signed remainder, `false` for positive remainder

Parameters

x	first operand as positive finite half-precision value
y	second operand as positive finite half-precision value
quo	adress to store quotient at, `nullptr` if Q `false`

Returns: modulus of x / y

Definition at line 1409 of file half.hpp.

◆ mulhi()

uint32 half_float::detail::mulhi	(	uint32	x,
		uint32	y
	)

upper part of 64-bit multiplication.

Template Parameters

R	rounding mode to use

Parameters

x	first factor
y	second factor

Returns: upper 32 bit of x * y

Definition at line 1355 of file half.hpp.

◆ multiply64()

uint32 half_float::detail::multiply64	(	uint32	x,
		uint32	y
	)

inline

64-bit multiplication.

Parameters

x	first factor
y	second factor

Returns: upper 32 bit of x * y rounded to nearest

Definition at line 1366 of file half.hpp.

◆ overflow()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::overflow ( unsigned int sign = 0 )

Half-precision overflow.

Template Parameters

R	rounding mode to use

Parameters

sign	half-precision value with sign bit only

Returns: rounded overflowing half-precision value

Exceptions

FE_OVERFLOW

Definition at line 784 of file half.hpp.

◆ pole()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::pole ( unsigned int sign = 0 )

inline

Raise pole error and return infinity.

Parameters

sign	half-precision value with sign bit only

Returns: half-precision infinity with sign of sign

Exceptions

FE_DIVBYZERO

Definition at line 755 of file half.hpp.

◆ raise()

void half_float::detail::raise	(	int	HALF_UNUSED_NOERRflags,
		bool	HALF_UNUSED_NOERRcond = `true`
	)

inline

Raise floating-point exception.

Parameters

flags	exceptions to raise
cond	condition to raise exceptions for

Definition at line 625 of file half.hpp.

◆ rounded()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::rounded	(	unsigned int	value,
		int	g,
		int	s
	)

Round half-precision number.

Template Parameters

R	rounding mode to use
I	`true` to always raise INEXACT exception, `false` to raise only for rounded results

Parameters

value	finite half-precision number to round
g	guard bit (most significant discarded bit)
s	sticky bit (or of all but the most significant discarded bits)

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded or I is `true`

Definition at line 820 of file half.hpp.

◆ select()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::select	(	unsigned int	x,
		unsigned int	HALF_UNUSED_NOERRy
	)

inline

Select value or signaling NaN.

Parameters

x	preferred half-precision value
y	ignored half-precision value except for signaling NaN

Returns: y if signaling NaN, x otherwise

Exceptions

FE_INVALID if y is signaling NaN

Definition at line 731 of file half.hpp.

◆ sign_mask()

uint32 half_float::detail::sign_mask ( uint32 arg )

inline

Platform-independent sign mask.

Parameters

arg	integer value in two's complement

Return values

-1	if arg negative
0	if arg positive

Definition at line 591 of file half.hpp.

◆ signal() [1/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal ( unsigned int nan )

inline

Signal and silence signaling NaN.

Parameters

nan	half-precision NaN value

Returns: quiet NaN

Exceptions

FE_INVALID if nan is signaling NaN

Definition at line 691 of file half.hpp.

◆ signal() [2/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal	(	unsigned int	x,
		unsigned int	y
	)

inline

Signal and silence signaling NaNs.

Parameters

x	first half-precision value to check
y	second half-precision value to check

Returns: quiet NaN

Exceptions

FE_INVALID if x or y is signaling NaN

Definition at line 704 of file half.hpp.

◆ signal() [3/3]

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::signal	(	unsigned int	x,
		unsigned int	y,
		unsigned int	z
	)

inline

Signal and silence signaling NaNs.

Parameters

x	first half-precision value to check
y	second half-precision value to check
z	third half-precision value to check

Returns: quiet NaN

Exceptions

FE_INVALID if x, y or z is signaling NaN

Definition at line 718 of file half.hpp.

◆ sincos()

std::pair< uint32, uint32 > half_float::detail::sincos	(	uint32	mz,
		unsigned int	n = `31`
	)

inline

Fixed point sine and cosine. This uses the CORDIC algorithm in rotation mode.

Parameters

mz	angle in [-pi/2,pi/2] as Q1.30
n	number of iterations (at most 31)

Returns: sine and cosine of mz as Q1.30

Definition at line 1558 of file half.hpp.

◆ sqrt()

uint32 half_float::detail::sqrt	(	uint32 &	r,
		int &	exp
	)

Fixed point square root.

Template Parameters

F	number of fractional bits

Parameters

r	radicand in Q1.F fixed point format
exp	exponent

Returns: square root as Q1.F/2

Definition at line 1480 of file half.hpp.

◆ tangent_post()

unsigned int half_float::detail::tangent_post	(	uint32	my,
		uint32	mx,
		int	exp,
		unsigned int	sign = `0`
	)

Division and postprocessing for tangents.

Template Parameters

R	rounding mode to use

Parameters

my	dividend as Q1.31
mx	divisor as Q1.31
exp	biased exponent of result
sign	sign bit of result

Returns: quotient converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if no other exception occurred

Definition at line 1767 of file half.hpp.

◆ underflow()

HALF_CONSTEXPR_NOERR unsigned int half_float::detail::underflow ( unsigned int sign = 0 )

Half-precision underflow.

Template Parameters

R	rounding mode to use

Parameters

sign	half-precision value with sign bit only

Returns: rounded underflowing half-precision value

Exceptions

FE_UNDERFLOW

Definition at line 800 of file half.hpp.

Variable Documentation

◆ binary

HALF_CONSTEXPR_CONST binary_t binary = binary_t()

Tag for binary construction.

Definition at line 536 of file half.hpp.

Classes

Namespaces

Macros

Typedefs

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ FE_ALL_EXCEPT

◆ FE_DIVBYZERO

◆ FE_INEXACT

◆ FE_INVALID

◆ FE_OVERFLOW

◆ FE_UNDERFLOW

◆ FP_FAST_FMAH

◆ FP_ILOGB0

◆ FP_ILOGBNAN

◆ FP_INFINITE

◆ FP_NAN

◆ FP_NORMAL

◆ FP_SUBNORMAL

◆ FP_ZERO

◆ HALF_CONSTEXPR

◆ HALF_CONSTEXPR_CONST

◆ HALF_CONSTEXPR_NOERR

◆ HALF_ENABLE_F16C_INTRINSICS

◆ HALF_ERRHANDLING

◆ HALF_ERRHANDLING_OVERFLOW_TO_INEXACT

◆ HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT

◆ HALF_GCC_VERSION

◆ HALF_ICC_VERSION

◆ HALF_NOEXCEPT

◆ HALF_NOTHROW

◆ HALF_ROUND_STYLE

◆ HALF_THREAD_LOCAL

◆ HALF_UNUSED_NOERR

◆ HLF_ROUNDS

◆ HUGE_VALH

Typedef Documentation

◆ false_type

◆ int32

◆ true_type

◆ uint16

◆ uint32

Function Documentation

◆ angle_arg()

◆ area()

◆ arithmetic_shift()

◆ atan2()

◆ atan2_args()

◆ builtin_isinf()

◆ builtin_isnan()

◆ builtin_signbit()

◆ check_underflow()

◆ compsignal()

◆ divide64()

◆ erf()

◆ errflags()

◆ exp2()

◆ exp2_post()

◆ fixed2half()

◆ float2half()

◆ float2half_impl() [1/3]

◆ float2half_impl() [2/3]

◆ float2half_impl() [3/3]

◆ gamma()

◆ half2float()

◆ half2float_impl() [1/3]

◆ half2float_impl() [2/3]

◆ half2float_impl() [3/3]

◆ half2int()

◆ hyperbolic_args()

◆ hypot_post()

◆ int2half()

◆ integral()

◆ invalid()

◆ log2()

◆ log2_post()

◆ mod()

◆ mulhi()