OpenNN
Open-source neural networks library
Loading...
Searching...
No Matches
layer.h
Go to the documentation of this file.
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// L A Y E R C L A S S H E A D E R
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#pragma once
10
11#include "tensor_utilities.h"
12#include "math_utilities.h"
13#include "random_utilities.h"
14#include "string_utilities.h"
15#include "forward_propagation.h"
16#include "back_propagation.h"
17
18namespace opennn
19{
20
21struct Operator;
22
42
44[[nodiscard]] inline const EnumMap<LayerType>& layer_type_map()
45{
46 static const vector<pair<LayerType, string>> entries = {
47 {LayerType::Addition, "Addition"},
48 {LayerType::Bounding, "Bounding"},
49 {LayerType::Convolutional, "Convolutional"},
50 {LayerType::ConvolutionalRelu, "ConvolutionalRelu"},
51 {LayerType::Dense, "Dense"},
52 {LayerType::DenseRelu, "DenseRelu"},
53 {LayerType::Embedding, "Embedding"},
54 {LayerType::Flatten, "Flatten"},
55 {LayerType::MultiHeadAttention, "MultiHeadAttention"},
56 {LayerType::Normalization3d, "Normalization3d"},
57 {LayerType::Pooling, "Pooling"},
58 {LayerType::Pooling3d, "Pooling3d"},
59 {LayerType::Recurrent, "Recurrent"},
60 {LayerType::Scaling, "Scaling"},
61 {LayerType::Unscaling, "Unscaling"}
62 };
63 static const EnumMap<LayerType> map{entries};
64 return map;
65}
66
68[[nodiscard]] inline const string& layer_type_to_string(LayerType type)
69{
70 return layer_type_map().to_string(type);
71}
72
74[[nodiscard]] inline LayerType string_to_layer_type(const string& name)
75{
76 return layer_type_map().from_string(name);
77}
78
84inline void check_rank(const Shape& shape, initializer_list<int> allowed,
85 const char* layer, const char* what)
86{
87 if (shape.empty()) return;
88 for (int r : allowed) if (int(shape.rank) == r) return;
89
90 string allowed_str;
91 auto it = allowed.begin();
92 while (it != allowed.end())
93 {
94 if (!allowed_str.empty())
95 allowed_str += (it + 1 == allowed.end()) ? " or " : ", ";
96 allowed_str += to_string(*it);
97 ++it;
98 }
99
100 throw runtime_error(format("{} layer supports {} rank {} (got {}).",
101 layer, what, allowed_str, shape.rank));
102}
103
105class Layer
106{
107
108public:
109
110 virtual ~Layer() = default;
111
112 [[nodiscard]] const string& get_label() const { return label; }
113
114 [[nodiscard]] const string& get_name() const { return layer_type_to_string(layer_type); }
115
116 [[nodiscard]] LayerType get_type() const { return layer_type; }
117
119 virtual void set_input_shape(const Shape&);
120
122 virtual void set_output_shape(const Shape&);
123
124 void set_label(string new_label) { label = move(new_label); }
125
127 [[nodiscard]] Index get_parameters_number() const;
128
129 [[nodiscard]] const vector<Operator*>& get_operators() const { return operators; }
130
132 [[nodiscard]] virtual vector<TensorSpec> get_parameter_specs() const;
133
135 [[nodiscard]] virtual vector<TensorSpec> get_state_specs() const;
136
139 [[nodiscard]] virtual vector<TensorSpec> get_forward_specs(Index batch_size) const
140 {
141 return {{Shape{batch_size}.append(get_output_shape()), compute_dtype}};
142 }
143
146 [[nodiscard]] virtual vector<TensorSpec> get_backward_specs(Index batch_size) const
147 {
148 if (!is_trainable) return {};
149 return {{Shape{batch_size}.append(get_input_shape()), compute_dtype}};
150 }
151
153 [[nodiscard]] virtual Shape get_input_shape() const { return input_shape; }
154
156 [[nodiscard]] virtual Shape get_output_shape() const = 0;
157
160
161 [[nodiscard]] Index get_inputs_number() const { return get_input_shape().size(); }
162
163 [[nodiscard]] Index get_outputs_number() const { return get_output_shape().size(); }
164
169 virtual void forward_propagate(ForwardPropagation& fp, size_t layer, bool is_training) noexcept
170 {
171 for (Operator* op : get_operators())
172 op->forward_propagate(fp, layer, is_training);
173 }
174
179 virtual void back_propagate(ForwardPropagation& fp, BackPropagation& bp, size_t i) const noexcept
180 {
181 for (Operator* op : views::reverse(get_operators()))
182 op->back_propagate(fp, bp, i);
183 }
184
186 virtual void from_JSON(const JsonDocument& document);
187
189 virtual void read_JSON_body(const Json*) {}
190
192 virtual void load_state_from_JSON(const JsonDocument& document);
193
195 virtual void to_JSON(JsonWriter& writer) const;
196
198 virtual void write_JSON_body(JsonWriter&) const {}
199
203 [[nodiscard]] virtual string write_expression(const vector<string>& /*input_names*/,
204 const vector<string>& /*output_names*/) const { return {}; }
205
207 virtual void print() const {}
208
209 [[nodiscard]] bool get_is_trainable() const { return is_trainable; }
210
211 [[nodiscard]] Type get_compute_dtype() const { return compute_dtype; }
212
214 void set_compute_dtype(Type new_compute_dtype)
215 {
216 compute_dtype = new_compute_dtype;
218 }
219
221 virtual void on_compute_dtype_changed() {}
222
226 virtual float* link_states(float* pointer);
227
232 float* link_gradients(float* pointer, vector<TensorView>& gradient_views);
233
234 vector<TensorView>& get_parameter_views() { return parameters; }
235 const vector<TensorView>& get_parameter_views() const { return parameters; }
236
239
240protected:
241
242 Layer() = default;
243
244 Layer(LayerType t, bool trainable = true)
245 : layer_type(t), is_trainable(trainable) {}
246
249
250 string label = "my_layer";
251
253
254 bool is_trainable = true;
255
257
259
260 vector<TensorView> parameters;
261 vector<TensorView> states;
262
263 vector<Operator*> operators;
264
266 vector<TensorView>& views, float* pointer,
267 vector<TensorSpec> (Operator::*specs_fn)() const,
268 void (Operator::*link_fn)(span<const TensorView>));
269
270};
271
272}
273
274// OpenNN: Open Neural Networks Library.
275// Copyright(C) 2005-2026 Artificial Intelligence Techniques, SL.
276// Licensed under the GNU Lesser General Public License v2.1 or later.
Element-wise addition layer that sums two input tensors of identical shape (residual connections).
Definition addition_layer.h:19
Output bounding layer that clips outputs to configured lower and upper limits.
Definition bounding_layer.h:19
Fused convolution + ReLU layer; runs as a single GPU op (cudnn) and is CUDA-Graph friendly.
Definition convolutional_relu_layer.h:24
2D convolutional layer with kernel, stride, padding, activation, and optional batch normalization.
Definition convolutional_layer.h:19
Fused dense + ReLU layer; combines linear projection and ReLU activation in a single op for performan...
Definition dense_relu_layer.h:19
Fully-connected layer with configurable activation, optional batch normalization and dropout.
Definition dense_layer.h:19
Token-id to dense vector embedding layer with optional scaling and positional encoding.
Definition embedding_layer.h:19
Flatten layer that reshapes a multi-dimensional input into a single 1D feature vector.
Definition flatten_layer.h:19
Definition json.h:72
Definition json.h:85
Definition json.h:23
LayerType layer_type
Definition layer.h:252
Index get_inputs_number() const
Definition layer.h:161
Type get_compute_dtype() const
Definition layer.h:211
Index get_parameters_number() const
Returns the total number of trainable parameters owned by this layer.
const string & get_label() const
Definition layer.h:112
Type compute_dtype
Definition layer.h:258
vector< Operator * > operators
Definition layer.h:263
LayerType get_type() const
Definition layer.h:116
virtual void read_JSON_body(const Json *)
Subclass hook reading the body section of the layer's JSON node.
Definition layer.h:189
Layer()=default
vector< TensorView > parameters
Definition layer.h:260
virtual Shape get_input_shape() const
Returns the input shape stored by the layer.
Definition layer.h:153
Layer(LayerType t, bool trainable=true)
Definition layer.h:244
virtual void on_compute_dtype_changed()
Subclass hook invoked when the compute dtype changes; default is no-op.
Definition layer.h:221
vector< TensorView > states
Definition layer.h:261
virtual string write_expression(const vector< string > &, const vector< string > &) const
Returns a human-readable mathematical expression for this layer (empty by default).
Definition layer.h:203
Backward
Definition layer.h:248
@ OutputDelta
Definition layer.h:248
@ InputDelta
Definition layer.h:248
virtual vector< TensorSpec > get_state_specs() const
Returns the tensor specs of persistent state (e.g. running mean/variance).
virtual float * link_states(float *pointer)
Binds the persistent-state region of the shared buffer to operator views.
virtual ~Layer()=default
bool get_is_trainable() const
Definition layer.h:209
const vector< Operator * > & get_operators() const
Definition layer.h:129
Forward
Definition layer.h:247
@ Output
Definition layer.h:247
@ Input
Definition layer.h:247
virtual vector< TensorSpec > get_parameter_specs() const
Returns the tensor specs of trainable parameters; subclasses override.
float * link_views_to_operators(vector< TensorView > &views, float *pointer, vector< TensorSpec >(Operator::*specs_fn)() const, void(Operator::*link_fn)(span< const TensorView >))
virtual void load_state_from_JSON(const JsonDocument &document)
Restores persistent state (e.g. running statistics) from a JSON document.
virtual void from_JSON(const JsonDocument &document)
Restores layer configuration and parameters from a JSON document.
string label
Definition layer.h:250
void set_compute_dtype(Type new_compute_dtype)
Sets the compute dtype and notifies subclasses via on_compute_dtype_changed().
Definition layer.h:214
virtual void back_propagate(ForwardPropagation &fp, BackPropagation &bp, size_t i) const noexcept
Runs the backward pass by chaining the layer's operators in reverse order.
Definition layer.h:179
bool is_trainable
Definition layer.h:254
virtual vector< TensorSpec > get_backward_specs(Index batch_size) const
Returns the tensor specs of the backward workspace; empty for non-trainable layers.
Definition layer.h:146
virtual void forward_propagate(ForwardPropagation &fp, size_t layer, bool is_training) noexcept
Runs the forward pass by chaining the layer's operators in order.
Definition layer.h:169
void set_label(string new_label)
Definition layer.h:124
virtual void write_JSON_body(JsonWriter &) const
Subclass hook writing the body section of the layer's JSON node.
Definition layer.h:198
const vector< TensorView > & get_parameter_views() const
Definition layer.h:235
virtual vector< TensorSpec > get_forward_specs(Index batch_size) const
Returns the tensor specs of the forward workspace; defaults to a single output tensor.
Definition layer.h:139
Index get_outputs_number() const
Definition layer.h:163
virtual ActivationOp::Function get_output_activation() const
Returns the layer's output activation (Identity for most layers; overridden by Dense/Bounding).
Definition layer.h:159
const string & get_name() const
Definition layer.h:114
vector< TensorView > & get_parameter_views()
Definition layer.h:234
virtual void print() const
Prints a human-readable summary of the layer to standard output.
Definition layer.h:207
float * link_gradients(float *pointer, vector< TensorView > &gradient_views)
Binds the gradient slice of the shared buffer to operator gradient views.
void redistribute_parameters_to_operators()
Re-binds operator parameter views after the parameter buffer has been resized or moved.
virtual void to_JSON(JsonWriter &writer) const
Serializes layer configuration and parameters to a JSON writer.
virtual void set_output_shape(const Shape &)
Sets the output shape; subclasses override when the output is user-configurable.
virtual Shape get_output_shape() const =0
Returns the output shape; subclasses must implement this to expose their geometry.
virtual void set_input_shape(const Shape &)
Sets the input shape; subclasses override to derive dependent dimensions.
Shape input_shape
Definition layer.h:256
Multi-head scaled dot-product attention layer used in transformer architectures.
Definition multihead_attention_layer.h:20
Layer normalization over the embedding axis of a 3D (batch, sequence, embedding) tensor.
Definition normalization_layer_3d.h:19
Sequence pooling layer reducing the time axis of a (sequence, features) input.
Definition pooling_layer_3d.h:20
2D spatial pooling layer supporting max and average reduction.
Definition pooling_layer.h:40
Basic recurrent (RNN) layer that unrolls over time steps with a shared activation.
Definition recurrent_layer.h:18
Input scaling layer that normalizes features using per-variable descriptive statistics.
Definition scaling_layer.h:21
Output unscaling layer that reverts normalization back to the original feature ranges.
Definition unscaling_layer.h:22
Definition adaptive_moment_estimation.h:14
LayerType
Identifier of every concrete layer subclass shipped with OpenNN.
Definition layer.h:25
@ MultiHeadAttention
Definition layer.h:34
@ ConvolutionalRelu
Definition layer.h:29
@ Recurrent
Definition layer.h:38
@ Pooling3d
Definition layer.h:37
@ Convolutional
Definition layer.h:28
@ Unscaling
Definition layer.h:40
@ Pooling
Definition layer.h:36
@ Flatten
Definition layer.h:33
@ Bounding
Definition layer.h:27
@ Addition
Definition layer.h:26
@ Dense
Definition layer.h:30
@ Embedding
Definition layer.h:32
@ Scaling
Definition layer.h:39
@ Normalization3d
Definition layer.h:35
@ DenseRelu
Definition layer.h:31
const string & layer_type_to_string(LayerType type)
Returns the string name associated with the given LayerType.
Definition layer.h:68
Type
Numeric precision used for training or inference tensors.
Definition configuration.h:20
@ FP32
Definition configuration.h:20
LayerType string_to_layer_type(const string &name)
Returns the LayerType corresponding to the given string name.
Definition layer.h:74
const EnumMap< LayerType > & layer_type_map()
Returns the bidirectional mapping between LayerType values and their string names.
Definition layer.h:44
void check_rank(const Shape &shape, initializer_list< int > allowed, const char *layer, const char *what)
Throws if shape rank is not one of allowed.
Definition layer.h:84
Function
Supported activation functions.
Definition operators.h:171
@ Identity
Definition operators.h:171
Workspace holding parameter gradients and per-layer deltas during a backward pass.
Definition back_propagation.h:21
Definition enum_map.h:18
const string & to_string(Enum value) const
Definition enum_map.h:23
Workspace holding the activations of every layer during a forward pass.
Definition forward_propagation.h:20
Base class for compute building blocks composed by layers (matmul, activation, dropout,...
Definition operators.h:28
Fixed-capacity small-vector describing tensor dimensions (rank up to MaxRank).
Definition tensor_utilities.h:42
bool empty() const noexcept
Definition tensor_utilities.h:76
size_t rank
Definition tensor_utilities.h:46
Shape & append(const Shape &other)
Appends another shape's dimensions to this one, stopping at MaxRank.
Definition tensor_utilities.h:106
Index size() const noexcept
Returns the number of elements (product of all dimensions).
Definition tensor_utilities.h:82