31 static constexpr Index
bytes = Index(
sizeof(
float));
32 static constexpr const char*
name =
"FP32";
42 static constexpr const char*
name =
"BF16";
51 static constexpr Index
bytes = Index(1);
52 static constexpr const char*
name =
"INT8";
58template<
Type... Supported,
typename F>
63 throw_if(!matched,
"visit_type: unsupported Type value");
70template<
Type... Supported,
typename F>
73 visit_type<Supported...>(t_in, [&](
auto in_info)
75 visit_type<Supported...>(t_out, [&](
auto out_info)
140 static Configuration configuration;
141 return configuration;
160 return cached_resolved;
162 return resolve_slow();
175 [[nodiscard]]
const Resolved& resolve_slow()
const;
181 mutable Resolved cached_resolved;
182 mutable atomic<bool> cache_valid{
false};
Global singleton holding the OpenNN device and precision configuration.
Definition configuration.h:126
const Resolved & resolve() const
Returns the cached resolved configuration, computing it on first access.
Definition configuration.h:157
bool is_cpu() const
Definition configuration.h:166
Device get_device() const
Definition configuration.h:152
Type get_training_type() const
Definition configuration.h:153
bool is_bf16_inference() const
Definition configuration.h:169
bool is_bf16_training() const
Definition configuration.h:168
bool is_gpu() const
Definition configuration.h:165
Type get_inference_type() const
Definition configuration.h:154
static Configuration & instance()
Returns the process-wide Configuration singleton.
Definition configuration.h:138
void set(Device new_device=Device::Auto, Type new_training_type=Type::Auto, Type new_inference_type=Type::Auto)
Updates device and precision settings; subsequent resolve() calls reflect the change.
Definition adaptive_moment_estimation.h:14
bool is_cpu()
Returns true when the resolved configuration runs on CPU.
Definition configuration.h:188
Device current_device()
Returns the active runtime device (CUDA if available, otherwise CPU).
Definition configuration.h:194
Index type_bytes(Type type) noexcept
Returns the byte size of one element of the given OpenNN Type.
Definition configuration.h:111
bool is_bf16_inference()
Returns true when inference is configured to use BF16 precision.
Definition configuration.h:192
Device
Execution device selection for OpenNN runtime (auto-detected, CPU or CUDA GPU).
Definition configuration.h:17
@ Auto
Definition configuration.h:17
@ CPU
Definition configuration.h:17
@ CUDA
Definition configuration.h:17
void throw_if(bool condition, const string &message, const source_location &loc=source_location::current())
Definition pch.h:147
bool is_gpu()
Returns true when the resolved configuration runs on a CUDA GPU.
Definition configuration.h:186
cudnnDataType_t to_cudnn(Type type) noexcept
Returns the cuDNN data type matching the given OpenNN Type (Auto resolves to FP32).
Definition configuration.h:83
Type
Numeric precision used for training or inference tensors.
Definition configuration.h:20
@ Auto
Definition configuration.h:20
@ FP32
Definition configuration.h:20
@ INT8
Definition configuration.h:20
@ BF16
Definition configuration.h:20
void visit_type(Type t, F &&f)
Dispatches f with the TypeInfo of the runtime Type t (must be in Supported).
Definition configuration.h:59
cudaDataType_t to_cuda(Type type) noexcept
Returns the CUDA data type matching the given OpenNN Type (Auto resolves to FP32).
Definition configuration.h:97
bool is_bf16_training()
Returns true when training is configured to use BF16 precision.
Definition configuration.h:190
void visit_type_pair(Type t_in, Type t_out, F &&f)
Dispatches f with the TypeInfo pair for an input and output runtime Type.
Definition configuration.h:71
__nv_bfloat16 bfloat16
Definition pch.h:145
cudaDataType_t
Definition pch.h:93
@ CUDA_R_32F
Definition pch.h:93
@ CUDA_R_8I
Definition pch.h:93
@ CUDA_R_16BF
Definition pch.h:93
cudnnDataType_t
Definition pch.h:99
@ CUDNN_DATA_BFLOAT16
Definition pch.h:99
@ CUDNN_DATA_INT8
Definition pch.h:99
@ CUDNN_DATA_FLOAT
Definition pch.h:99
Resolved configuration after Auto values are mapped to concrete device and types.
Definition configuration.h:131
Type training_type
Definition configuration.h:133
Type inference_type
Definition configuration.h:134
Device device
Definition configuration.h:132
bfloat16 type
Definition configuration.h:38
static constexpr Index bytes
Definition configuration.h:41
static constexpr cudaDataType_t cuda
Definition configuration.h:40
static constexpr cudnnDataType_t cudnn
Definition configuration.h:39
static constexpr const char * name
Definition configuration.h:42
float type
Definition configuration.h:28
static constexpr Index bytes
Definition configuration.h:31
static constexpr cudnnDataType_t cudnn
Definition configuration.h:29
static constexpr const char * name
Definition configuration.h:32
static constexpr cudaDataType_t cuda
Definition configuration.h:30
static constexpr cudaDataType_t cuda
Definition configuration.h:50
int8_t type
Definition configuration.h:48
static constexpr cudnnDataType_t cudnn
Definition configuration.h:49
static constexpr const char * name
Definition configuration.h:52
static constexpr Index bytes
Definition configuration.h:51
Compile-time traits mapping an opennn::Type to its underlying numeric type and library identifiers.
Definition configuration.h:23