46 static const vector<pair<LayerType, string>> entries = {
85 const char* layer,
const char* what)
87 if (shape.
empty())
return;
88 for (
int r : allowed)
if (
int(shape.
rank) == r)
return;
91 auto it = allowed.begin();
92 while (it != allowed.end())
94 if (!allowed_str.empty())
95 allowed_str += (it + 1 == allowed.end()) ?
" or " :
", ";
96 allowed_str += to_string(*it);
100 throw runtime_error(format(
"{} layer supports {} rank {} (got {}).",
101 layer, what, allowed_str, shape.
rank));
172 op->forward_propagate(fp, layer, is_training);
182 op->back_propagate(fp, bp, i);
204 const vector<string>& )
const {
return {}; }
266 vector<TensorView>& views,
float* pointer,
267 vector<TensorSpec> (
Operator::*specs_fn)()
const,
268 void (
Operator::*link_fn)(span<const TensorView>));
Element-wise addition layer that sums two input tensors of identical shape (residual connections).
Definition addition_layer.h:19
Output bounding layer that clips outputs to configured lower and upper limits.
Definition bounding_layer.h:19
Fused convolution + ReLU layer; runs as a single GPU op (cudnn) and is CUDA-Graph friendly.
Definition convolutional_relu_layer.h:24
2D convolutional layer with kernel, stride, padding, activation, and optional batch normalization.
Definition convolutional_layer.h:19
Fused dense + ReLU layer; combines linear projection and ReLU activation in a single op for performan...
Definition dense_relu_layer.h:19
Fully-connected layer with configurable activation, optional batch normalization and dropout.
Definition dense_layer.h:19
Token-id to dense vector embedding layer with optional scaling and positional encoding.
Definition embedding_layer.h:19
Flatten layer that reshapes a multi-dimensional input into a single 1D feature vector.
Definition flatten_layer.h:19
LayerType layer_type
Definition layer.h:252
Index get_inputs_number() const
Definition layer.h:161
Type get_compute_dtype() const
Definition layer.h:211
Index get_parameters_number() const
Returns the total number of trainable parameters owned by this layer.
const string & get_label() const
Definition layer.h:112
Type compute_dtype
Definition layer.h:258
vector< Operator * > operators
Definition layer.h:263
LayerType get_type() const
Definition layer.h:116
virtual void read_JSON_body(const Json *)
Subclass hook reading the body section of the layer's JSON node.
Definition layer.h:189
vector< TensorView > parameters
Definition layer.h:260
virtual Shape get_input_shape() const
Returns the input shape stored by the layer.
Definition layer.h:153
Layer(LayerType t, bool trainable=true)
Definition layer.h:244
virtual void on_compute_dtype_changed()
Subclass hook invoked when the compute dtype changes; default is no-op.
Definition layer.h:221
vector< TensorView > states
Definition layer.h:261
virtual string write_expression(const vector< string > &, const vector< string > &) const
Returns a human-readable mathematical expression for this layer (empty by default).
Definition layer.h:203
Backward
Definition layer.h:248
@ OutputDelta
Definition layer.h:248
@ InputDelta
Definition layer.h:248
virtual vector< TensorSpec > get_state_specs() const
Returns the tensor specs of persistent state (e.g. running mean/variance).
virtual float * link_states(float *pointer)
Binds the persistent-state region of the shared buffer to operator views.
bool get_is_trainable() const
Definition layer.h:209
const vector< Operator * > & get_operators() const
Definition layer.h:129
Forward
Definition layer.h:247
@ Output
Definition layer.h:247
@ Input
Definition layer.h:247
virtual vector< TensorSpec > get_parameter_specs() const
Returns the tensor specs of trainable parameters; subclasses override.
float * link_views_to_operators(vector< TensorView > &views, float *pointer, vector< TensorSpec >(Operator::*specs_fn)() const, void(Operator::*link_fn)(span< const TensorView >))
virtual void load_state_from_JSON(const JsonDocument &document)
Restores persistent state (e.g. running statistics) from a JSON document.
virtual void from_JSON(const JsonDocument &document)
Restores layer configuration and parameters from a JSON document.
string label
Definition layer.h:250
void set_compute_dtype(Type new_compute_dtype)
Sets the compute dtype and notifies subclasses via on_compute_dtype_changed().
Definition layer.h:214
virtual void back_propagate(ForwardPropagation &fp, BackPropagation &bp, size_t i) const noexcept
Runs the backward pass by chaining the layer's operators in reverse order.
Definition layer.h:179
bool is_trainable
Definition layer.h:254
virtual vector< TensorSpec > get_backward_specs(Index batch_size) const
Returns the tensor specs of the backward workspace; empty for non-trainable layers.
Definition layer.h:146
virtual void forward_propagate(ForwardPropagation &fp, size_t layer, bool is_training) noexcept
Runs the forward pass by chaining the layer's operators in order.
Definition layer.h:169
void set_label(string new_label)
Definition layer.h:124
virtual void write_JSON_body(JsonWriter &) const
Subclass hook writing the body section of the layer's JSON node.
Definition layer.h:198
const vector< TensorView > & get_parameter_views() const
Definition layer.h:235
virtual vector< TensorSpec > get_forward_specs(Index batch_size) const
Returns the tensor specs of the forward workspace; defaults to a single output tensor.
Definition layer.h:139
Index get_outputs_number() const
Definition layer.h:163
virtual ActivationOp::Function get_output_activation() const
Returns the layer's output activation (Identity for most layers; overridden by Dense/Bounding).
Definition layer.h:159
const string & get_name() const
Definition layer.h:114
vector< TensorView > & get_parameter_views()
Definition layer.h:234
virtual void print() const
Prints a human-readable summary of the layer to standard output.
Definition layer.h:207
float * link_gradients(float *pointer, vector< TensorView > &gradient_views)
Binds the gradient slice of the shared buffer to operator gradient views.
void redistribute_parameters_to_operators()
Re-binds operator parameter views after the parameter buffer has been resized or moved.
virtual void to_JSON(JsonWriter &writer) const
Serializes layer configuration and parameters to a JSON writer.
virtual void set_output_shape(const Shape &)
Sets the output shape; subclasses override when the output is user-configurable.
virtual Shape get_output_shape() const =0
Returns the output shape; subclasses must implement this to expose their geometry.
virtual void set_input_shape(const Shape &)
Sets the input shape; subclasses override to derive dependent dimensions.
Shape input_shape
Definition layer.h:256
Multi-head scaled dot-product attention layer used in transformer architectures.
Definition multihead_attention_layer.h:20
Layer normalization over the embedding axis of a 3D (batch, sequence, embedding) tensor.
Definition normalization_layer_3d.h:19
Sequence pooling layer reducing the time axis of a (sequence, features) input.
Definition pooling_layer_3d.h:20
2D spatial pooling layer supporting max and average reduction.
Definition pooling_layer.h:40
Basic recurrent (RNN) layer that unrolls over time steps with a shared activation.
Definition recurrent_layer.h:18
Input scaling layer that normalizes features using per-variable descriptive statistics.
Definition scaling_layer.h:21
Output unscaling layer that reverts normalization back to the original feature ranges.
Definition unscaling_layer.h:22
Definition adaptive_moment_estimation.h:14
LayerType
Identifier of every concrete layer subclass shipped with OpenNN.
Definition layer.h:25
@ MultiHeadAttention
Definition layer.h:34
@ ConvolutionalRelu
Definition layer.h:29
@ Recurrent
Definition layer.h:38
@ Pooling3d
Definition layer.h:37
@ Convolutional
Definition layer.h:28
@ Unscaling
Definition layer.h:40
@ Pooling
Definition layer.h:36
@ Flatten
Definition layer.h:33
@ Bounding
Definition layer.h:27
@ Addition
Definition layer.h:26
@ Dense
Definition layer.h:30
@ Embedding
Definition layer.h:32
@ Scaling
Definition layer.h:39
@ Normalization3d
Definition layer.h:35
@ DenseRelu
Definition layer.h:31
const string & layer_type_to_string(LayerType type)
Returns the string name associated with the given LayerType.
Definition layer.h:68
Type
Numeric precision used for training or inference tensors.
Definition configuration.h:20
@ FP32
Definition configuration.h:20
LayerType string_to_layer_type(const string &name)
Returns the LayerType corresponding to the given string name.
Definition layer.h:74
const EnumMap< LayerType > & layer_type_map()
Returns the bidirectional mapping between LayerType values and their string names.
Definition layer.h:44
void check_rank(const Shape &shape, initializer_list< int > allowed, const char *layer, const char *what)
Throws if shape rank is not one of allowed.
Definition layer.h:84
Function
Supported activation functions.
Definition operators.h:171
@ Identity
Definition operators.h:171
Workspace holding parameter gradients and per-layer deltas during a backward pass.
Definition back_propagation.h:21
const string & to_string(Enum value) const
Definition enum_map.h:23
Workspace holding the activations of every layer during a forward pass.
Definition forward_propagation.h:20
Base class for compute building blocks composed by layers (matmul, activation, dropout,...
Definition operators.h:28
Fixed-capacity small-vector describing tensor dimensions (rank up to MaxRank).
Definition tensor_utilities.h:42
bool empty() const noexcept
Definition tensor_utilities.h:76
size_t rank
Definition tensor_utilities.h:46
Shape & append(const Shape &other)
Appends another shape's dimensions to this one, stopping at MaxRank.
Definition tensor_utilities.h:106
Index size() const noexcept
Returns the number of elements (product of all dimensions).
Definition tensor_utilities.h:82