OpenNN
Open-source neural networks library
Loading...
Searching...
No Matches
opennn::Embedding Class Referencefinal

Token-id-to-vector lookup layer used in language models. More...

#include <embedding_layer.h>

Inheritance diagram for opennn::Embedding:
[legend]

Public Member Functions

 Embedding (const Shape &input_shape={0, 0}, Index embedding_dimension=0, const string &label="embedding_layer")
 Constructs an Embedding layer.
 
Shape get_input_shape () const override
 Returns the per-sample input shape (sequence_length,).
 
Shape get_output_shape () const override
 Returns the per-sample output shape.
 
Index get_vocabulary_size () const
 Number of distinct tokens in the vocabulary.
 
Index get_sequence_length () const
 Sequence length expected at input.
 
Index get_embedding_dimension () const
 Width of each embedding vector.
 
vector< Operator * > get_operators () override
 Returns the active operators in pipeline order.
 
vector< pair< Shape, Type > > get_forward_specs (Index batch_size) const override
 Specifications of the forward intermediate buffers.
 
void set (Index vocabulary_size=0, Index sequence_length=0, Index embedding_dimension=0, const string &label="embedding_layer")
 Re-initializes the layer.
 
void set_scale_embedding (bool enabled)
 Enables Transformer-style sqrt(d_model) scaling on the embedding table output.
 
void set_add_positional_encoding (bool enabled)
 Enables addition of a sinusoidal positional encoding after lookup.
 
void set_dropout_rate (float rate)
 Sets the dropout rate applied at the layer output.
 
void back_propagate (ForwardPropagation &, BackPropagation &, size_t) const noexcept override
 Backward pass: scatters output gradients into the embedding table rows referenced by the input ids.
 
void read_JSON_body (const Json *) override
 Reads the layer-specific JSON body (vocabulary size, sequence length, embedding dimension, scale/positional flags, dropout).
 
void write_JSON_body (JsonWriter &) const override
 Writes the layer-specific JSON body (vocabulary size, sequence length, embedding dimension, scale/positional flags, dropout).
 
- Public Member Functions inherited from opennn::Layer
virtual ~Layer ()=default
 Virtual destructor; subclasses are owned via unique_ptr<Layer>.
 
const string & get_label () const
 Returns the user-assigned label of this layer.
 
const string & get_name () const
 Returns the canonical type name of this layer.
 
LayerType get_type () const
 Returns the LayerType enumerator for this layer.
 
virtual void set_input_shape (const Shape &)
 Sets the per-sample input shape of this layer.
 
virtual void set_output_shape (const Shape &)
 Sets the per-sample output shape of this layer.
 
void set_label (string new_label)
 Sets the human-readable label of this layer.
 
Index get_parameters_number () const
 Total number of trainable parameters in this layer.
 
virtual vector< pair< Shape, Type > > get_parameter_specs () const
 Specifications of the trainable parameter tensors owned by this layer.
 
virtual vector< pair< Shape, Type > > get_state_specs () const
 Specifications of the persistent state tensors of this layer.
 
virtual vector< pair< Shape, Type > > get_backward_specs (Index batch_size) const
 Specifications of the backward intermediate buffers for one batch.
 
vector< Shapeget_parameter_shapes () const
 Shape-only view of get_parameter_specs().
 
vector< Shapeget_state_shapes () const
 Shape-only view of get_state_specs().
 
vector< Shapeget_forward_shapes (Index b) const
 Shape-only view of get_forward_specs() for batch size b.
 
vector< Shapeget_backward_shapes (Index b) const
 Shape-only view of get_backward_specs() for batch size b.
 
vector< Typeget_parameter_dtypes () const
 Dtype-only view of get_parameter_specs().
 
vector< Typeget_forward_dtypes (Index b) const
 Dtype-only view of get_forward_specs() for batch size b.
 
vector< Typeget_backward_dtypes (Index b) const
 Dtype-only view of get_backward_specs() for batch size b.
 
virtual Activation::Function get_output_activation () const
 Activation function fused at the end of this layer, if any.
 
Index get_inputs_number () const
 Total number of scalar inputs per sample (product of input dims).
 
Index get_outputs_number () const
 Total number of scalar outputs per sample (product of output dims).
 
virtual void forward_propagate (ForwardPropagation &fp, size_t layer, bool is_training) noexcept
 Forward pass: reads inputs from fp and writes outputs into fp.
 
virtual void from_JSON (const JsonDocument &document)
 Loads the layer configuration (hyperparameters) from JSON.
 
virtual void load_state_from_JSON (const JsonDocument &document)
 Loads parameter and state tensors from a JSON document.
 
virtual void to_JSON (JsonWriter &writer) const
 Writes the layer configuration to JSON.
 
virtual void print () const
 Prints a human-readable summary of the layer to stdout.
 
bool get_is_trainable () const
 Whether this layer has trainable parameters.
 
Type get_compute_dtype () const
 Numerical type used for forward/backward computation.
 
void set_compute_dtype (Type new_compute_dtype)
 Sets the compute dtype and triggers on_compute_dtype_changed().
 
virtual void on_compute_dtype_changed ()
 Hook invoked after set_compute_dtype() mutates the dtype.
 
virtual float * link_parameters (float *pointer)
 Wires this layer's parameter TensorViews onto an external buffer.
 
virtual float * link_states (float *pointer)
 Wires this layer's state TensorViews onto an external buffer.
 
vector< TensorView > & get_parameter_views ()
 Mutable access to this layer's parameter TensorViews.
 
const vector< TensorView > & get_parameter_views () const
 Read-only access to this layer's parameter TensorViews.
 
vector< TensorView > & get_state_views ()
 Mutable access to this layer's state TensorViews.
 
const vector< TensorView > & get_state_views () const
 Read-only access to this layer's state TensorViews.
 
void redistribute_parameters_to_operators ()
 Forwards the current parameter views down to each composing Operator.
 
void redistribute_parameter_gradients_to_operators (vector< TensorView > &gradient_views)
 Forwards externally provided gradient views down to each Operator.
 
void redistribute_states_to_operators ()
 Forwards the current state views down to each composing Operator.
 

Additional Inherited Members

- Protected Member Functions inherited from opennn::Layer
 Layer ()=default
 Default constructor; only invoked by subclasses.
 
float * link_views (float *pointer, const vector< Shape > &shapes, vector< TensorView > &views, const char *tag) const
 Builds views over a contiguous float buffer using shapes.
 
void distribute_to_operators (vector< TensorView > &views, void(Operator::*link)(const vector< TensorView > &), vector< pair< Shape, Type > >(Operator::*specs)() const)
 Generic helper used by the redistribute_*_to_operators() routines.
 
- Protected Attributes inherited from opennn::Layer
string label = "my_layer"
 User-visible label for this layer instance (default "my_layer").
 
string name = "layer"
 Canonical type name set by the subclass (e.g. "dense").
 
LayerType layer_type = LayerType::Dense
 Layer type tag set by the subclass.
 
bool is_trainable = true
 True if the layer has parameters that participate in training.
 
bool is_first_layer = false
 True if this layer is the network's input layer.
 
Type compute_dtype = Type::FP32
 Numerical type used for forward and backward computation.
 
vector< TensorViewparameters
 Parameter TensorViews bound to the network's parameter arena.
 
vector< TensorViewstates
 State TensorViews bound to the network's state arena.
 
vector< unique_ptr< Layer > > layers
 Sub-layers, when this layer is itself a composite.
 

Detailed Description

Token-id-to-vector lookup layer used in language models.

Inputs are token id sequences (rank-1, length sequence_length). Each id is replaced by its row in an embedding table of shape (vocabulary_size, embedding_dimension), producing a rank-2 output of shape (sequence_length, embedding_dimension).

Optional flags toggle Transformer-style sqrt(d_model) scaling and positional encoding addition; an optional dropout is applied at the end.

Constructor & Destructor Documentation

◆ Embedding()

opennn::Embedding::Embedding ( const Shape & input_shape = {0, 0},
Index embedding_dimension = 0,
const string & label = "embedding_layer" )

Constructs an Embedding layer.

Parameters
input_shapePer-sample input shape; carries (sequence_length, vocabulary_size) when both are known up front.
embedding_dimensionWidth of each embedding vector.
labelHuman-readable label assigned to this layer.

Member Function Documentation

◆ back_propagate()

void opennn::Embedding::back_propagate ( ForwardPropagation & ,
BackPropagation & ,
size_t  ) const
overridevirtualnoexcept

Backward pass: scatters output gradients into the embedding table rows referenced by the input ids.

Receives the forward intermediates, the BackPropagation buffer and this layer's index inside the network.

Reimplemented from opennn::Layer.

◆ get_embedding_dimension()

Index opennn::Embedding::get_embedding_dimension ( ) const
inline

Width of each embedding vector.

◆ get_forward_specs()

vector< pair< Shape, Type > > opennn::Embedding::get_forward_specs ( Index batch_size) const
overridevirtual

Specifications of the forward intermediate buffers.

Parameters
batch_sizeBatch size used for sizing.
Returns
Specs for the Input and Output slots.

Reimplemented from opennn::Layer.

◆ get_input_shape()

Shape opennn::Embedding::get_input_shape ( ) const
inlineoverridevirtual

Returns the per-sample input shape (sequence_length,).

Implements opennn::Layer.

◆ get_operators()

vector< Operator * > opennn::Embedding::get_operators ( )
overridevirtual

Returns the active operators in pipeline order.

Returns
EmbeddingLookup followed by Dropout if its rate is non-zero.

Reimplemented from opennn::Layer.

◆ get_output_shape()

Shape opennn::Embedding::get_output_shape ( ) const
overridevirtual

Returns the per-sample output shape.

Returns
(sequence_length, embedding_dimension).

Implements opennn::Layer.

◆ get_sequence_length()

Index opennn::Embedding::get_sequence_length ( ) const
inline

Sequence length expected at input.

◆ get_vocabulary_size()

Index opennn::Embedding::get_vocabulary_size ( ) const
inline

Number of distinct tokens in the vocabulary.

◆ read_JSON_body()

void opennn::Embedding::read_JSON_body ( const Json * )
overridevirtual

Reads the layer-specific JSON body (vocabulary size, sequence length, embedding dimension, scale/positional flags, dropout).

Reimplemented from opennn::Layer.

◆ set()

void opennn::Embedding::set ( Index vocabulary_size = 0,
Index sequence_length = 0,
Index embedding_dimension = 0,
const string & label = "embedding_layer" )

Re-initializes the layer.

Parameters
vocabulary_sizeNumber of distinct tokens.
sequence_lengthSequence length expected at input.
embedding_dimensionWidth of each embedding vector.
labelHuman-readable label.

◆ set_add_positional_encoding()

void opennn::Embedding::set_add_positional_encoding ( bool enabled)
inline

Enables addition of a sinusoidal positional encoding after lookup.

Parameters
enabledTrue to add positional encodings; false to skip.

◆ set_dropout_rate()

void opennn::Embedding::set_dropout_rate ( float rate)
inline

Sets the dropout rate applied at the layer output.

Parameters
rateProbability of dropping each unit (0 disables dropout).

◆ set_scale_embedding()

void opennn::Embedding::set_scale_embedding ( bool enabled)
inline

Enables Transformer-style sqrt(d_model) scaling on the embedding table output.

Parameters
enabledTrue to scale; false to leave embeddings as-is.

◆ write_JSON_body()

void opennn::Embedding::write_JSON_body ( JsonWriter & ) const
overridevirtual

Writes the layer-specific JSON body (vocabulary size, sequence length, embedding dimension, scale/positional flags, dropout).

Reimplemented from opennn::Layer.