OpenNN
Open-source neural networks library
Loading...
Searching...
No Matches
opennn::MultiHeadProjectionOp Struct Reference

Projects (input_features) into (heads * head_dim) and reshapes for multi-head attention. More...

#include <operators.h>

Inheritance diagram for opennn::MultiHeadProjectionOp:
[legend]

Public Member Functions

void set (Index input_features, Index heads_number, Index head_dimension, Type compute_dtype)
 Configures the projection geometry.
 
vector< TensorSpecparameter_specs () const override
 Returns the tensor specs of trainable parameters owned by this operator.
 
void link_parameters (span< const TensorView > views) override
 Binds parameter views provided by the hosting layer.
 
void link_gradients (span< const TensorView > views) override
 Binds gradient views provided by the hosting layer.
 
void set_parameters_random () override
 Initializes parameters with random values.
 
void set_parameters_glorot () override
 Initializes parameters using Glorot (Xavier) initialization.
 
void forward_propagate (ForwardPropagation &fp, size_t layer, bool is_training) noexcept override
 Runs the operator's forward computation.
 
void back_propagate (ForwardPropagation &fp, BackPropagation &bp, size_t layer) const noexcept override
 Runs the operator's backward computation, accumulating into gradient/delta buffers.
 
void apply (const TensorView &input, TensorView &head_output, float *scratch)
 Projects input and reshapes the result into per-head form in head_output.
 
void apply_delta (const TensorView &head_delta, const TensorView &input, TensorView &input_delta, bool accumulate, float *scratch) const
 Computes input_delta from per-head gradients and updates the projection weight gradient.
 
- Public Member Functions inherited from opennn::Operator
virtual ~Operator ()=default
 
virtual vector< TensorSpecstate_specs () const
 Returns the tensor specs of persistent state owned by this operator.
 
virtual void link_states (span< const TensorView >)
 Binds state views provided by the hosting layer.
 
virtual void to_JSON (JsonWriter &) const
 Serializes the operator configuration to a JSON writer.
 
virtual void from_JSON (const Json *)
 Restores the operator configuration from a JSON node.
 
virtual void load_state_from_JSON (const Json *)
 Restores persistent state (e.g. running statistics) from a JSON node.
 
virtual void destroy_cuda ()
 Releases CUDA resources owned by the operator; called from destructors.
 
TensorViewget_input (ForwardPropagation &fp, size_t layer, size_t i=0) const noexcept
 
vector< TensorView > & get_inputs (ForwardPropagation &fp, size_t layer, size_t i=0) const noexcept
 
TensorViewget_output (ForwardPropagation &fp, size_t layer, size_t i=0) const noexcept
 
TensorViewget_output_delta (BackPropagation &bp, size_t layer, size_t i=0) const noexcept
 
TensorViewget_input_delta (BackPropagation &bp, size_t layer, size_t i=0) const noexcept
 

Public Attributes

CombinationOp combination
 
Index input_features = 0
 
Index heads_number = 0
 
Index head_dimension = 0
 
Type compute_dtype = Type::FP32
 
size_t input_view_index = 0
 
vector< size_t > scratch_slots
 
vector< size_t > input_delta_slots_self
 
vector< size_t > input_delta_slots_cross
 
bool accumulate_input_delta_self = false
 
bool accumulate_input_delta_cross = false
 
- Public Attributes inherited from opennn::Operator
vector< size_t > input_slots = {0}
 
vector< size_t > output_slots = {1}
 
vector< size_t > input_delta_slots = {1}
 
vector< size_t > output_delta_slots = {0}
 

Detailed Description

Projects (input_features) into (heads * head_dim) and reshapes for multi-head attention.

Member Function Documentation

◆ apply()

void opennn::MultiHeadProjectionOp::apply ( const TensorView & input,
TensorView & head_output,
float * scratch )

Projects input and reshapes the result into per-head form in head_output.

Parameters
inputInput tokens (batch, seq, embed).
head_outputOutput tensor (batch, heads, seq, head_dim).
scratchShared transpose-scratch buffer used during the reshape.

◆ apply_delta()

void opennn::MultiHeadProjectionOp::apply_delta ( const TensorView & head_delta,
const TensorView & input,
TensorView & input_delta,
bool accumulate,
float * scratch ) const

Computes input_delta from per-head gradients and updates the projection weight gradient.

Parameters
head_deltaGradient w.r.t. the per-head output.
inputForward-pass input tokens.
input_deltaOutput gradient w.r.t. the input.
accumulateIf true, accumulates into input_delta instead of overwriting.
scratchShared transpose-scratch buffer.

◆ back_propagate()

void opennn::MultiHeadProjectionOp::back_propagate ( ForwardPropagation & fp,
BackPropagation & bp,
size_t layer ) const
overridevirtualnoexcept

Runs the operator's backward computation, accumulating into gradient/delta buffers.

Parameters
fpForward propagation workspace (read-only).
bpBack propagation workspace receiving gradients and deltas.
layerIndex of the hosting layer in the workspace.

Reimplemented from opennn::Operator.

◆ forward_propagate()

void opennn::MultiHeadProjectionOp::forward_propagate ( ForwardPropagation & fp,
size_t layer,
bool is_training )
overridevirtualnoexcept

Runs the operator's forward computation.

Parameters
fpForward propagation workspace.
layerIndex of the hosting layer in the workspace.
is_trainingIf true, enables training-only behavior (e.g. dropout sampling).

Reimplemented from opennn::Operator.

◆ link_gradients()

void opennn::MultiHeadProjectionOp::link_gradients ( span< const TensorView > )
inlineoverridevirtual

Binds gradient views provided by the hosting layer.

Reimplemented from opennn::Operator.

◆ link_parameters()

void opennn::MultiHeadProjectionOp::link_parameters ( span< const TensorView > )
inlineoverridevirtual

Binds parameter views provided by the hosting layer.

Reimplemented from opennn::Operator.

◆ parameter_specs()

vector< TensorSpec > opennn::MultiHeadProjectionOp::parameter_specs ( ) const
inlineoverridevirtual

Returns the tensor specs of trainable parameters owned by this operator.

Reimplemented from opennn::Operator.

◆ set()

void opennn::MultiHeadProjectionOp::set ( Index input_features,
Index heads_number,
Index head_dimension,
Type compute_dtype )

Configures the projection geometry.

Parameters
input_featuresEmbedding dimension of the input tokens.
heads_numberNumber of attention heads.
head_dimensionPer-head feature size.
compute_dtypeDtype used for the projection matmul.

◆ set_parameters_glorot()

void opennn::MultiHeadProjectionOp::set_parameters_glorot ( )
inlineoverridevirtual

Initializes parameters using Glorot (Xavier) initialization.

Reimplemented from opennn::Operator.

◆ set_parameters_random()

void opennn::MultiHeadProjectionOp::set_parameters_random ( )
inlineoverridevirtual

Initializes parameters with random values.

Reimplemented from opennn::Operator.

Member Data Documentation

◆ accumulate_input_delta_cross

bool opennn::MultiHeadProjectionOp::accumulate_input_delta_cross = false

◆ accumulate_input_delta_self

bool opennn::MultiHeadProjectionOp::accumulate_input_delta_self = false

◆ combination

CombinationOp opennn::MultiHeadProjectionOp::combination

◆ compute_dtype

Type opennn::MultiHeadProjectionOp::compute_dtype = Type::FP32

◆ head_dimension

Index opennn::MultiHeadProjectionOp::head_dimension = 0

◆ heads_number

Index opennn::MultiHeadProjectionOp::heads_number = 0

◆ input_delta_slots_cross

vector<size_t> opennn::MultiHeadProjectionOp::input_delta_slots_cross

◆ input_delta_slots_self

vector<size_t> opennn::MultiHeadProjectionOp::input_delta_slots_self

◆ input_features

Index opennn::MultiHeadProjectionOp::input_features = 0

◆ input_view_index

size_t opennn::MultiHeadProjectionOp::input_view_index = 0

◆ scratch_slots

vector<size_t> opennn::MultiHeadProjectionOp::scratch_slots