Token-id-to-vector lookup layer used in language models. More...

#include <embedding_layer.h>

Inheritance diagram for opennn::Embedding:

Public Member Functions
	Embedding (const Shape &input_shape={0, 0}, Index embedding_dimension=0, const string &label="embedding_layer")
	Constructs an Embedding layer.

Shape	get_input_shape () const override
	Returns the per-sample input shape (sequence_length,).

Shape	get_output_shape () const override
	Returns the per-sample output shape.

Index	get_vocabulary_size () const
	Number of distinct tokens in the vocabulary.

Index	get_sequence_length () const
	Sequence length expected at input.

Index	get_embedding_dimension () const
	Width of each embedding vector.

vector< Operator * >	get_operators () override
	Returns the active operators in pipeline order.

vector< pair< Shape, Type > >	get_forward_specs (Index batch_size) const override
	Specifications of the forward intermediate buffers.

void	set (Index vocabulary_size=0, Index sequence_length=0, Index embedding_dimension=0, const string &label="embedding_layer")
	Re-initializes the layer.

void	set_scale_embedding (bool enabled)
	Enables Transformer-style sqrt(d_model) scaling on the embedding table output.

void	set_add_positional_encoding (bool enabled)
	Enables addition of a sinusoidal positional encoding after lookup.

void	set_dropout_rate (float rate)
	Sets the dropout rate applied at the layer output.

void	back_propagate (ForwardPropagation &, BackPropagation &, size_t) const noexcept override
	Backward pass: scatters output gradients into the embedding table rows referenced by the input ids.

void	read_JSON_body (const Json *) override
	Reads the layer-specific JSON body (vocabulary size, sequence length, embedding dimension, scale/positional flags, dropout).

void	write_JSON_body (JsonWriter &) const override
	Writes the layer-specific JSON body (vocabulary size, sequence length, embedding dimension, scale/positional flags, dropout).

Public Member Functions inherited from opennn::Layer
virtual	~Layer ()=default
	Virtual destructor; subclasses are owned via unique_ptr<Layer>.

const string &	get_label () const
	Returns the user-assigned label of this layer.

const string &	get_name () const
	Returns the canonical type name of this layer.

LayerType	get_type () const
	Returns the LayerType enumerator for this layer.

virtual void	set_input_shape (const Shape &)
	Sets the per-sample input shape of this layer.

virtual void	set_output_shape (const Shape &)
	Sets the per-sample output shape of this layer.

void	set_label (string new_label)
	Sets the human-readable label of this layer.

Index	get_parameters_number () const
	Total number of trainable parameters in this layer.

virtual vector< pair< Shape, Type > >	get_parameter_specs () const
	Specifications of the trainable parameter tensors owned by this layer.

virtual vector< pair< Shape, Type > >	get_state_specs () const
	Specifications of the persistent state tensors of this layer.

virtual vector< pair< Shape, Type > >	get_backward_specs (Index batch_size) const
	Specifications of the backward intermediate buffers for one batch.

vector< Shape >	get_parameter_shapes () const
	Shape-only view of get_parameter_specs().

vector< Shape >	get_state_shapes () const
	Shape-only view of get_state_specs().

vector< Shape >	get_forward_shapes (Index b) const
	Shape-only view of get_forward_specs() for batch size `b`.

vector< Shape >	get_backward_shapes (Index b) const
	Shape-only view of get_backward_specs() for batch size `b`.

vector< Type >	get_parameter_dtypes () const
	Dtype-only view of get_parameter_specs().

vector< Type >	get_forward_dtypes (Index b) const
	Dtype-only view of get_forward_specs() for batch size `b`.

vector< Type >	get_backward_dtypes (Index b) const
	Dtype-only view of get_backward_specs() for batch size `b`.

virtual Activation::Function	get_output_activation () const
	Activation function fused at the end of this layer, if any.

Index	get_inputs_number () const
	Total number of scalar inputs per sample (product of input dims).

Index	get_outputs_number () const
	Total number of scalar outputs per sample (product of output dims).

virtual void	forward_propagate (ForwardPropagation &fp, size_t layer, bool is_training) noexcept
	Forward pass: reads inputs from `fp` and writes outputs into `fp`.

virtual void	from_JSON (const JsonDocument &document)
	Loads the layer configuration (hyperparameters) from JSON.

virtual void	load_state_from_JSON (const JsonDocument &document)
	Loads parameter and state tensors from a JSON document.

virtual void	to_JSON (JsonWriter &writer) const
	Writes the layer configuration to JSON.

virtual void	print () const
	Prints a human-readable summary of the layer to stdout.

bool	get_is_trainable () const
	Whether this layer has trainable parameters.

Type	get_compute_dtype () const
	Numerical type used for forward/backward computation.

void	set_compute_dtype (Type new_compute_dtype)
	Sets the compute dtype and triggers on_compute_dtype_changed().

virtual void	on_compute_dtype_changed ()
	Hook invoked after set_compute_dtype() mutates the dtype.

virtual float *	link_parameters (float *pointer)
	Wires this layer's parameter TensorViews onto an external buffer.

virtual float *	link_states (float *pointer)
	Wires this layer's state TensorViews onto an external buffer.

vector< TensorView > &	get_parameter_views ()
	Mutable access to this layer's parameter TensorViews.

const vector< TensorView > &	get_parameter_views () const
	Read-only access to this layer's parameter TensorViews.

vector< TensorView > &	get_state_views ()
	Mutable access to this layer's state TensorViews.

const vector< TensorView > &	get_state_views () const
	Read-only access to this layer's state TensorViews.

void	redistribute_parameters_to_operators ()
	Forwards the current parameter views down to each composing Operator.

void	redistribute_parameter_gradients_to_operators (vector< TensorView > &gradient_views)
	Forwards externally provided gradient views down to each Operator.

void	redistribute_states_to_operators ()
	Forwards the current state views down to each composing Operator.

Additional Inherited Members
Protected Member Functions inherited from opennn::Layer
	Layer ()=default
	Default constructor; only invoked by subclasses.

float *	link_views (float pointer, const vector< Shape > &shapes, vector< TensorView > &views, const char tag) const
	Builds `views` over a contiguous float buffer using `shapes`.

void	distribute_to_operators (vector< TensorView > &views, void(Operator::link)(const vector< TensorView > &), vector< pair< Shape, Type > >(Operator::specs)() const)
	Generic helper used by the redistribute_*_to_operators() routines.

Protected Attributes inherited from opennn::Layer
string	label = "my_layer"
	User-visible label for this layer instance (default "my_layer").

string	name = "layer"
	Canonical type name set by the subclass (e.g. "dense").

LayerType	layer_type = LayerType::Dense
	Layer type tag set by the subclass.

bool	is_trainable = true
	True if the layer has parameters that participate in training.

bool	is_first_layer = false
	True if this layer is the network's input layer.

Type	compute_dtype = Type::FP32
	Numerical type used for forward and backward computation.

vector< TensorView >	parameters
	Parameter TensorViews bound to the network's parameter arena.

vector< TensorView >	states
	State TensorViews bound to the network's state arena.

vector< unique_ptr< Layer > >	layers
	Sub-layers, when this layer is itself a composite.

Detailed Description

Token-id-to-vector lookup layer used in language models.

Inputs are token id sequences (rank-1, length sequence_length). Each id is replaced by its row in an embedding table of shape (vocabulary_size, embedding_dimension), producing a rank-2 output of shape (sequence_length, embedding_dimension).

Optional flags toggle Transformer-style sqrt(d_model) scaling and positional encoding addition; an optional dropout is applied at the end.

Constructor & Destructor Documentation

◆ Embedding()

opennn::Embedding::Embedding	(	const Shape &	input_shape = {0, 0},
		Index	embedding_dimension = 0,
		const string &	label = "embedding_layer" )

Constructs an Embedding layer.

Parameters

input_shape	Per-sample input shape; carries (sequence_length, vocabulary_size) when both are known up front.
embedding_dimension	Width of each embedding vector.
label	Human-readable label assigned to this layer.

Member Function Documentation

◆ back_propagate()

void opennn::Embedding::back_propagate	(	ForwardPropagation &	,
		BackPropagation &	,
		size_t	) const

overridevirtualnoexcept

Backward pass: scatters output gradients into the embedding table rows referenced by the input ids.

Receives the forward intermediates, the BackPropagation buffer and this layer's index inside the network.

Reimplemented from opennn::Layer.

◆ get_embedding_dimension()

Index opennn::Embedding::get_embedding_dimension ( ) const

inline

Width of each embedding vector.

◆ get_forward_specs()

vector< pair< Shape, Type > > opennn::Embedding::get_forward_specs ( Index batch_size ) const

overridevirtual

Specifications of the forward intermediate buffers.

Parameters

batch_size Batch size used for sizing.

Returns: Specs for the Input and Output slots.

Reimplemented from opennn::Layer.

◆ get_input_shape()

Shape opennn::Embedding::get_input_shape ( ) const

inlineoverridevirtual

Returns the per-sample input shape (sequence_length,).

Implements opennn::Layer.

◆ get_operators()

vector< Operator * > opennn::Embedding::get_operators ( )

overridevirtual

Returns the active operators in pipeline order.

Returns: EmbeddingLookup followed by Dropout if its rate is non-zero.

Reimplemented from opennn::Layer.

◆ get_output_shape()

Shape opennn::Embedding::get_output_shape ( ) const

overridevirtual

Returns the per-sample output shape.

Returns: (sequence_length, embedding_dimension).

Implements opennn::Layer.

◆ get_sequence_length()

Index opennn::Embedding::get_sequence_length ( ) const

inline

Sequence length expected at input.

◆ get_vocabulary_size()

Index opennn::Embedding::get_vocabulary_size ( ) const

inline

Number of distinct tokens in the vocabulary.

◆ read_JSON_body()

void opennn::Embedding::read_JSON_body ( const Json * )

overridevirtual

Reads the layer-specific JSON body (vocabulary size, sequence length, embedding dimension, scale/positional flags, dropout).

Reimplemented from opennn::Layer.

◆ set()

void opennn::Embedding::set	(	Index	vocabulary_size = 0,
		Index	sequence_length = 0,
		Index	embedding_dimension = 0,
		const string &	label = "embedding_layer" )

Re-initializes the layer.

Parameters

vocabulary_size	Number of distinct tokens.
sequence_length	Sequence length expected at input.
embedding_dimension	Width of each embedding vector.
label	Human-readable label.

◆ set_add_positional_encoding()

void opennn::Embedding::set_add_positional_encoding ( bool enabled )

inline

Enables addition of a sinusoidal positional encoding after lookup.

Parameters

enabled True to add positional encodings; false to skip.

◆ set_dropout_rate()

void opennn::Embedding::set_dropout_rate ( float rate )

inline

Sets the dropout rate applied at the layer output.

Parameters

rate	Probability of dropping each unit (0 disables dropout).

◆ set_scale_embedding()

void opennn::Embedding::set_scale_embedding ( bool enabled )

inline

Enables Transformer-style sqrt(d_model) scaling on the embedding table output.

Parameters

enabled True to scale; false to leave embeddings as-is.

◆ write_JSON_body()

void opennn::Embedding::write_JSON_body ( JsonWriter & ) const

overridevirtual

Writes the layer-specific JSON body (vocabulary size, sequence length, embedding dimension, scale/positional flags, dropout).

Reimplemented from opennn::Layer.

Public Member Functions

Additional Inherited Members

Detailed Description

Constructor & Destructor Documentation

◆ Embedding()

Member Function Documentation

◆ back_propagate()

◆ get_embedding_dimension()

◆ get_forward_specs()

◆ get_input_shape()

◆ get_operators()

◆ get_output_shape()

◆ get_sequence_length()

◆ get_vocabulary_size()

◆ read_JSON_body()

◆ set()

◆ set_add_positional_encoding()

◆ set_dropout_rate()

◆ set_scale_embedding()

◆ write_JSON_body()