OpenNN
Open-source neural networks library
Loading...
Searching...
No Matches
neural_network.h
Go to the documentation of this file.
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// N E U R A L N E T W O R K C L A S S H E A D E R
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#pragma once
10
11#include "layer.h"
12#include "tensor_utilities.h"
13#include "variable.h"
14
15namespace opennn
16{
17
20{
21
22public:
23
26
27 virtual ~NeuralNetwork() = default;
28
31 NeuralNetwork(const filesystem::path&);
32
36 void add_layer(unique_ptr<Layer>,
37 const vector<Index>& = {});
38
39 [[nodiscard]] const Configuration::Resolved& get_config() const { return config; }
40 [[nodiscard]] bool is_gpu() const { return config.device == Device::CUDA; }
41 [[nodiscard]] bool is_cpu() const { return config.device == Device::CPU; }
42
43 [[nodiscard]] Type get_training_type() const { return config.training_type; }
44 [[nodiscard]] Type get_inference_type() const { return config.inference_type; }
45
47 [[nodiscard]] vector<vector<TensorSpec>> get_parameter_specs() const { return collect_layer_specs([](const Layer& L) { return L.get_parameter_specs(); }); }
48
50 [[nodiscard]] vector<vector<TensorSpec>> get_state_specs() const { return collect_layer_specs([](const Layer& L) { return L.get_state_specs(); }); }
51
54 [[nodiscard]] vector<vector<TensorSpec>> get_forward_specs(Index b) const
55 {
56 auto specs = collect_layer_specs([b](const Layer& L) { return L.get_forward_specs(b); });
57 if (!is_gpu()) force_specs_to_fp32(specs);
58 return specs;
59 }
60
63 [[nodiscard]] vector<vector<TensorSpec>> get_backward_specs(Index b) const
64 {
65 auto specs = collect_layer_specs([b](const Layer& L) { return L.get_backward_specs(b); });
66 if (!is_gpu()) force_specs_to_fp32(specs);
67 return specs;
68 }
69
71 [[nodiscard]] Index get_states_size() const { return get_aligned_size(get_state_specs()); }
72
74 void compile();
75
77 [[nodiscard]] bool has(const string&) const;
78
80 [[nodiscard]] bool has(LayerType) const;
81
82 [[nodiscard]] bool is_empty() const { return layers.empty(); }
83
84 [[nodiscard]] float* get_parameters_data() { return parameters.as<float>(); }
85 [[nodiscard]] const float* get_parameters_data() const { return parameters.as<float>(); }
86 [[nodiscard]] Index get_parameters_size() const { return parameters.size_in_floats(); }
87
88 [[nodiscard]] const vector<Variable>& get_input_variables() const { return input_variables; }
89
91 [[nodiscard]] vector<string> get_input_feature_names() const;
92
93 [[nodiscard]] const vector<Variable>& get_output_variables() const { return output_variables; }
94
96 [[nodiscard]] vector<string> get_output_feature_names() const;
97
98 [[nodiscard]] const vector<unique_ptr<Layer>>& get_layers() const { return layers; }
99 [[nodiscard]] const unique_ptr<Layer>& get_layer(const Index i) const { return layers[i]; }
100
103 [[nodiscard]] const unique_ptr<Layer>& get_layer(const string&) const;
104
106 [[nodiscard]] Index get_layer_index(const string&) const;
107
108 [[nodiscard]] const vector<vector<Index>>& get_source_layers() const { return source_layers; }
109
111 [[nodiscard]] vector<vector<Index>> get_consumer_layers() const;
112
114 [[nodiscard]] Layer* get_first(const string&);
115
117 [[nodiscard]] Layer* get_first(LayerType);
118
120 [[nodiscard]] const Layer* get_first(const string&) const;
121
123 [[nodiscard]] const Layer* get_first(LayerType) const;
124
127 void set_source_layers(const vector<vector<Index>>& new_source_layers) { source_layers = new_source_layers; }
128
130 void set_source_layers(const Index layer_index, const vector<Index>& new_sources) { source_layers[layer_index] = new_sources; }
131
135 void set_source_layers(const string&, const vector<string>&);
136
138 void set_source_layers(const string&, initializer_list<string>);
139
143 void set_source_layers(const string&, const string&);
144
145 void set_input_variables(const vector<Variable>& new_input_variables) { input_variables = new_input_variables; }
146 void set_output_variables(const vector<Variable>& new_output_variables) { output_variables = new_output_variables; }
147
149 void set_input_names(const vector<string>&);
150
152 void set_output_names(const vector<string>&);
153
155 void set_input_shape(const Shape&);
156
158 void clear();
159
160 [[nodiscard]] Index get_layers_number() const { return ssize(layers); }
161
163 [[nodiscard]] Index get_layers_number(const string&) const;
164
166 [[nodiscard]] Index get_layers_number(LayerType) const;
167
169 [[nodiscard]] Index get_first_trainable_layer_index() const;
170
172 [[nodiscard]] Index get_last_trainable_layer_index() const;
173
175 [[nodiscard]] Index get_inputs_number() const;
176
178 [[nodiscard]] Index get_outputs_number() const;
179
181 [[nodiscard]] Shape get_input_shape() const;
182
184 [[nodiscard]] Shape get_output_shape() const;
185
188
190 [[nodiscard]] Index get_parameters_number() const;
191
193 void set_parameters(const VectorR& new_parameters);
194
197
200
203
206
210 [[nodiscard]] MatrixR calculate_outputs(const vector<TensorView>&);
211
213 [[nodiscard]] MatrixR calculate_outputs(const MatrixR&);
214
216 [[nodiscard]] MatrixR calculate_outputs(const Tensor3&);
217
219 [[nodiscard]] MatrixR calculate_outputs(const Tensor4&);
220
228 [[nodiscard]] MatrixR calculate_directional_inputs(const Index, const VectorR&, float, float, Index = 101) const;
229
233 [[nodiscard]] Tensor3 calculate_outputs(const Tensor3&, const Tensor3&);
234
236 [[nodiscard]] Index calculate_image_output(const filesystem::path&);
237
239 [[nodiscard]] MatrixR calculate_text_outputs(const Tensor<string, 1>&);
240
243
245 void to_JSON(JsonWriter&) const;
246
248 void save(const filesystem::path&) const;
249
251 void save_parameters(const filesystem::path&) const;
252
254 void save_parameters_binary(const filesystem::path&) const;
255
257 void load(const filesystem::path&);
258
260 void load_parameters_binary(const filesystem::path&);
261
263 [[nodiscard]] vector<string> get_names_string() const;
264
266 void save_outputs(MatrixR&, const filesystem::path&);
267
269 void save_outputs(Tensor3&, const filesystem::path&);
270
275 void forward_propagate(const vector<TensorView>&,
277 bool = false) const;
278
285 void forward_propagate(const vector<TensorView>&,
287 bool is_training,
288 Index first_layer_index,
289 Index last_layer_index) const;
290
295 void forward_propagate(const vector<TensorView>&,
296 const VectorR&,
298
299#ifdef OPENNN_HAS_CUDA
300
301public:
302
304 void cast_parameters_to_bf16();
305
306 // Returns nullptr when no BF16 mirror is allocated (FP32-only mode), so
307 // optimizer kernels can pass it straight through and skip the mirror write.
308 [[nodiscard]] bfloat16* get_parameters_bf16_data()
309 {
310 return parameters_bf16.empty() ? nullptr : parameters_bf16.as<bfloat16>();
311 }
312
314 void copy_parameters_device();
315
317 void copy_parameters_host();
318
320 void copy_states_device();
321
323 void copy_states_host();
324
325private:
326
327 [[nodiscard]] MatrixR calculate_outputs_device(const vector<TensorView>&, ForwardPropagation&);
328
329#endif
330
331public:
332
334 [[nodiscard]] vector<string> get_layer_labels() const;
335
336private:
337
338 void validate_type(LayerType) const;
339
340 static void force_specs_to_fp32(vector<vector<TensorSpec>>& specs)
341 {
342 for (auto& layer_specs : specs)
343 for (auto& spec : layer_specs)
344 spec.dtype = Type::FP32;
345 }
346
347 template<typename Fn>
348 [[nodiscard]] vector<vector<TensorSpec>> collect_layer_specs(Fn fn) const
349 {
350 vector<vector<TensorSpec>> out(layers.size());
351 ranges::transform(layers, out.begin(),
352 [&](const unique_ptr<Layer>& l) { return fn(*l); });
353 return out;
354 }
355
356protected:
357
358 vector<Variable> input_variables;
359 vector<Variable> output_variables;
360
361 vector<unique_ptr<Layer>> layers;
362
363 vector<vector<Index>> source_layers;
364
367
369
371
372 // Cached by get_first/last_trainable_layer_index after first computation.
373 // Invalidated when the layer list changes (add_layer / clear).
374 mutable Index first_trainable_cache_ = -1;
375 mutable Index last_trainable_cache_ = -1;
376};
377
378}
379
380// OpenNN: Open Neural Networks Library.
381// Copyright(C) 2005-2026 Artificial Intelligence Techniques, SL.
382// Licensed under the GNU Lesser General Public License v2.1 or later.
Definition json.h:72
Definition json.h:85
Abstract base class for all OpenNN layers; orchestrates operators and shape propagation.
Definition layer.h:106
virtual vector< TensorSpec > get_state_specs() const
Returns the tensor specs of persistent state (e.g. running mean/variance).
virtual vector< TensorSpec > get_parameter_specs() const
Returns the tensor specs of trainable parameters; subclasses override.
virtual vector< TensorSpec > get_backward_specs(Index batch_size) const
Returns the tensor specs of the backward workspace; empty for non-trainable layers.
Definition layer.h:146
virtual vector< TensorSpec > get_forward_specs(Index batch_size) const
Returns the tensor specs of the forward workspace; defaults to a single output tensor.
Definition layer.h:139
void save(const filesystem::path &) const
Saves the full network (architecture + parameters) to a JSON file.
Buffer parameters
Definition neural_network.h:365
const unique_ptr< Layer > & get_layer(const string &) const
Returns the layer with the given label.
Layer * get_first(LayerType)
Returns the first layer of the given type, or nullptr if not found.
vector< vector< TensorSpec > > get_state_specs() const
Returns the tensor specs of persistent layer state (e.g. running statistics).
Definition neural_network.h:50
vector< string > get_layer_labels() const
Returns the labels of all layers in order.
MatrixR calculate_directional_inputs(const Index, const VectorR &, float, float, Index=101) const
Generates samples by sweeping one input dimension across a range while keeping the others fixed.
const vector< vector< Index > > & get_source_layers() const
Definition neural_network.h:108
void set_input_variables(const vector< Variable > &new_input_variables)
Definition neural_network.h:145
Index get_last_trainable_layer_index() const
Returns the index of the last trainable layer (cached).
void from_JSON(const JsonDocument &)
Restores the network architecture and parameters from a JSON document.
bool has(const string &) const
Returns whether the network contains a layer with the given label.
void set_output_names(const vector< string > &)
Sets the names of every output feature.
vector< Variable > output_variables
Definition neural_network.h:359
void set_parameters_random()
Initializes every parameter with random values.
void set_source_layers(const string &, const vector< string > &)
Sets the source layers of a layer using labels for identification.
NeuralNetwork()
Constructs an empty neural network.
Index last_trainable_cache_
Definition neural_network.h:375
NeuralNetwork(const filesystem::path &)
Constructs a neural network and loads its definition from a JSON file.
ActivationOp::Function get_output_activation() const
Returns the activation function of the output layer.
void forward_propagate(const vector< TensorView > &, ForwardPropagation &, bool=false) const
Runs a forward pass over all layers.
Index get_inputs_number() const
Returns the number of input features expected by the first layer.
void forward_propagate(const vector< TensorView > &, ForwardPropagation &, bool is_training, Index first_layer_index, Index last_layer_index) const
Runs a forward pass over a contiguous sub-range of layers.
Index get_parameters_number() const
Returns the total number of trainable parameters across all layers.
MatrixR calculate_outputs(const vector< TensorView > &)
Computes outputs for the given input tensor views.
MatrixR calculate_text_outputs(const Tensor< string, 1 > &)
Tokenizes the given strings and returns the network's outputs.
void forward_propagate(const vector< TensorView > &, const VectorR &, ForwardPropagation &)
Runs a forward pass after temporarily overwriting the parameter buffer.
bool is_cpu() const
Definition neural_network.h:41
vector< string > get_output_feature_names() const
Returns the flat list of output feature names (expanding categorical variables).
void load_parameters_binary(const filesystem::path &)
Loads parameter values from a binary file produced by save_parameters_binary().
Index get_outputs_number() const
Returns the number of output features produced by the last layer.
Shape get_output_shape() const
Returns the shape of the output of the last layer.
MatrixR calculate_outputs(const MatrixR &)
Computes outputs for a 2D input matrix.
Index get_layers_number() const
Definition neural_network.h:160
void save_outputs(Tensor3 &, const filesystem::path &)
Writes the 3D output tensor to a CSV file.
vector< vector< TensorSpec > > get_forward_specs(Index b) const
Returns the tensor specs of the forward-propagation workspace for each layer.
Definition neural_network.h:54
vector< string > get_names_string() const
Returns the labels of all layers as a vector of strings.
Index calculate_image_output(const filesystem::path &)
Reads an image file and returns the predicted class index.
Index get_layer_index(const string &) const
Returns the index of the layer with the given label, or -1 if not found.
Buffer parameters_bf16
Definition neural_network.h:366
Shape get_input_shape() const
Returns the shape of the input of the first layer.
Index get_states_size() const
Returns the total byte size required to hold all persistent layer states.
Definition neural_network.h:71
void set_parameters(const VectorR &new_parameters)
Copies the contents of new_parameters into the network's parameter buffer.
void compile()
Allocates buffers, resolves devices, and wires layer/operator views; call once after all layers are a...
void set_input_names(const vector< string > &)
Sets the names of every input feature.
void save_parameters_binary(const filesystem::path &) const
Saves only the parameter values to a binary file.
vector< Variable > input_variables
Definition neural_network.h:358
void clear()
Removes all layers and resets the network to an empty state.
vector< vector< TensorSpec > > get_backward_specs(Index b) const
Returns the tensor specs of the back-propagation workspace for each layer.
Definition neural_network.h:63
Index first_trainable_cache_
Definition neural_network.h:374
void save_outputs(MatrixR &, const filesystem::path &)
Writes the output matrix to a CSV file.
MatrixR calculate_outputs(const Tensor4 &)
Computes outputs for a 4D input tensor.
bool is_gpu() const
Definition neural_network.h:40
void set_parameters_glorot()
Initializes every parameter using Glorot (Xavier) initialization.
vector< unique_ptr< Layer > > layers
Definition neural_network.h:361
Configuration::Resolved config
Definition neural_network.h:370
Index get_parameters_size() const
Definition neural_network.h:86
const float * get_parameters_data() const
Definition neural_network.h:85
Layer * get_first(const string &)
Returns the first layer matching the given label, or nullptr if not found.
const Layer * get_first(const string &) const
Returns the first layer matching the given label, or nullptr if not found.
void link_parameters()
Wires the contiguous parameter buffer to per-layer / per-operator views.
MatrixR calculate_outputs(const Tensor3 &)
Computes outputs for a 3D input tensor.
vector< vector< Index > > source_layers
Definition neural_network.h:363
void add_layer(unique_ptr< Layer >, const vector< Index > &={})
Appends a layer to the network.
Index get_layers_number(const string &) const
Returns the number of layers whose label contains the given substring.
void set_input_shape(const Shape &)
Sets the shape of the input of the first layer and propagates it through the graph.
const Layer * get_first(LayerType) const
Returns the first layer of the given type, or nullptr if not found.
const unique_ptr< Layer > & get_layer(const Index i) const
Definition neural_network.h:99
void link_states()
Wires the contiguous state buffer to per-layer / per-operator views.
void set_output_variables(const vector< Variable > &new_output_variables)
Definition neural_network.h:146
Index get_first_trainable_layer_index() const
Returns the index of the first trainable layer (cached).
vector< string > get_input_feature_names() const
Returns the flat list of input feature names (expanding categorical variables).
void set_source_layers(const string &, const string &)
Convenience overload for a single source layer.
virtual ~NeuralNetwork()=default
vector< vector< Index > > get_consumer_layers() const
Returns the inverse adjacency: for each layer, the indices of layers that consume its output.
void set_source_layers(const vector< vector< Index > > &new_source_layers)
Replaces the layer connectivity graph.
Definition neural_network.h:127
const vector< unique_ptr< Layer > > & get_layers() const
Definition neural_network.h:98
Index get_layers_number(LayerType) const
Returns the number of layers of the given type.
bool has(LayerType) const
Returns whether the network contains at least one layer of the given type.
void save_parameters(const filesystem::path &) const
Saves only the parameter values to a JSON file.
Buffer states
Definition neural_network.h:368
void set_source_layers(const string &, initializer_list< string >)
Sets the source layers of a layer using labels for identification.
void load(const filesystem::path &)
Loads the full network (architecture + parameters) from a JSON file.
Tensor3 calculate_outputs(const Tensor3 &, const Tensor3 &)
Computes outputs for an encoder/decoder model.
const vector< Variable > & get_output_variables() const
Definition neural_network.h:93
Type get_inference_type() const
Definition neural_network.h:44
void to_JSON(JsonWriter &) const
Serializes the network architecture and parameters to a JSON writer.
Type get_training_type() const
Definition neural_network.h:43
vector< vector< TensorSpec > > get_parameter_specs() const
Returns the tensor specs of trainable parameters for every layer.
Definition neural_network.h:47
bool is_empty() const
Definition neural_network.h:82
float * get_parameters_data()
Definition neural_network.h:84
void set_source_layers(const Index layer_index, const vector< Index > &new_sources)
Replaces the source layers for one specific layer.
Definition neural_network.h:130
const vector< Variable > & get_input_variables() const
Definition neural_network.h:88
const Configuration::Resolved & get_config() const
Definition neural_network.h:39
Definition adaptive_moment_estimation.h:14
@ CPU
Definition configuration.h:17
@ CUDA
Definition configuration.h:17
LayerType
Identifier of every concrete layer subclass shipped with OpenNN.
Definition layer.h:25
Index get_aligned_size(Index size)
Definition tensor_utilities.h:28
Type
Numeric precision used for training or inference tensors.
Definition configuration.h:20
@ FP32
Definition configuration.h:20
__nv_bfloat16 bfloat16
Definition pch.h:145
Tensor< float, 3, Layout|AlignedMax > Tensor3
Definition pch.h:190
Matrix< float, Dynamic, 1 > VectorR
Definition pch.h:181
Matrix< float, Dynamic, Dynamic, Layout > MatrixR
Definition pch.h:177
Tensor< float, 4, Layout|AlignedMax > Tensor4
Definition pch.h:191
Function
Supported activation functions.
Definition operators.h:171
Owning raw byte buffer that lives on CPU or CUDA memory, with aligned (re)allocation.
Definition tensor_utilities.h:166
Resolved configuration after Auto values are mapped to concrete device and types.
Definition configuration.h:131
Workspace holding the activations of every layer during a forward pass.
Definition forward_propagation.h:20
Fixed-capacity small-vector describing tensor dimensions (rank up to MaxRank).
Definition tensor_utilities.h:42