documentation/reference/multihead__attention__layer_8h_source.html

//  OpenNN: Open Neural Networks Library

//  www.opennn.net

//

//  M U L T I H E A D   A T T E N T I O N   L A Y E R   C L A S S   H E A D E R

//

//  Artificial Intelligence Techniques SL

//  artelnics@artelnics.com


#pragma once


#include "layer.h"

#include "operators.h"

#include "math_utilities.h"


namespace opennn

{


class MultiHeadAttention final : public Layer

{

public:


    MultiHeadAttention(const Shape& = Shape({0, 0}),

                       Index = 0,

                       const string& = {});


    MultiHeadAttention(const Shape& new_query_dimensions,

                       const Shape& new_source_dimensions,

                       Index = 0,

                       const string& = {});


    Shape get_input_shape() const override;


    Shape get_output_shape() const override;


    Index get_query_sequence_length() const { return query_sequence_length; }

    Index get_source_sequence_length() const { return source_sequence_length; }

    Index get_embedding_dimension() const { return embedding_dimension; }

    Index get_heads_number() const { return heads_number; }


    Index get_head_dimension() const

    {

        return (heads_number == 0) ? 0 : Index(embedding_dimension / heads_number);

    }


    Shape get_heads_shape(Index batch_size) const

    {

        return {batch_size, heads_number, query_sequence_length, get_head_dimension()};

    }


    Shape get_concat_shape(Index batch_size) const

    {

        return {batch_size, query_sequence_length, heads_number, get_head_dimension()};

    }


    vector<TensorSpec> get_forward_specs(Index batch_size) const override;


    vector<TensorSpec> get_backward_specs(Index batch_size) const override;


    void set(Index = 0,

             Index = 0,

             Index = 0,

             Index = 0,

             bool = false,

             const string& = "multihead_attention_layer");


    void set_input_shape(const Shape&) override;


    void on_compute_dtype_changed() override;


    void set_dropout_rate(float new_dropout_rate) { attention.set_dropout_rate(new_dropout_rate); }


    void read_JSON_body(const Json*) override;


    void write_JSON_body(JsonWriter&) const override;


private:


    Index embedding_dimension = 0;

    Index heads_number = 0;

    Index query_sequence_length = 0;

    Index source_sequence_length = 0;


    MultiHeadProjectionOp query_projection;

    MultiHeadProjectionOp key_projection;

    MultiHeadProjectionOp value_projection;

    CombinationOp         output_projection;

    AttentionOp           attention;

    MergeOp               merge;


    enum Forward {Input, Query, Key, AttentionWeights, AttentionWeightsDropped,

                  ConcatenatedAttentionOutputs, Value, TransposeScratch, Output};

    enum Backward {

        OutputDelta,

        InputQueryDelta,         // final dInput query, embed shape

        InputSourceDelta,        // final dInput source, embed shape

        AttentionWeightDelta,    // unfused attention scratch

        ValueHeadDelta,          // dV, head shape

        ConcatenatedOutputDelta, // dConcat, embed shape

        QueryHeadDelta,          // dQ, head shape

        KeyHeadDelta             // dK, head shape

    };

};


}


// OpenNN: Open Neural Networks Library.

// Copyright(C) 2005-2026 Artificial Intelligence Techniques, SL.

// Licensed under the GNU Lesser General Public License v2.1 or later.

opennn::JsonWriter
Definition json.h:85

opennn::Json
Definition json.h:23

opennn::Layer::Layer
Layer()=default

opennn::MultiHeadAttention::set
void set(Index=0, Index=0, Index=0, Index=0, bool=false, const string &="multihead_attention_layer")
Reconfigures the layer with new sequence, embedding, head sizes and causal flag.

opennn::MultiHeadAttention::on_compute_dtype_changed
void on_compute_dtype_changed() override
Rebuilds projection operators when the compute dtype changes.

opennn::MultiHeadAttention::get_query_sequence_length
Index get_query_sequence_length() const
Definition multihead_attention_layer.h:47

opennn::MultiHeadAttention::get_heads_shape
Shape get_heads_shape(Index batch_size) const
Returns the per-head tensor shape used internally during attention.
Definition multihead_attention_layer.h:59

opennn::MultiHeadAttention::set_input_shape
void set_input_shape(const Shape &) override
Updates the layer for a new input shape.

opennn::MultiHeadAttention::get_concat_shape
Shape get_concat_shape(Index batch_size) const
Returns the shape used when concatenating heads back to the embedding dimension.
Definition multihead_attention_layer.h:65

opennn::MultiHeadAttention::get_output_shape
Shape get_output_shape() const override
Returns the output tensor shape.

opennn::MultiHeadAttention::get_heads_number
Index get_heads_number() const
Definition multihead_attention_layer.h:50

opennn::MultiHeadAttention::get_embedding_dimension
Index get_embedding_dimension() const
Definition multihead_attention_layer.h:49

opennn::MultiHeadAttention::get_forward_specs
vector< TensorSpec > get_forward_specs(Index batch_size) const override
Returns the tensor specifications used during forward propagation.

opennn::MultiHeadAttention::MultiHeadAttention
MultiHeadAttention(const Shape &new_query_dimensions, const Shape &new_source_dimensions, Index=0, const string &={})
Constructs a cross-attention layer with separate query and source (key/value) sequences.

opennn::MultiHeadAttention::get_source_sequence_length
Index get_source_sequence_length() const
Definition multihead_attention_layer.h:48

opennn::MultiHeadAttention::read_JSON_body
void read_JSON_body(const Json *) override
Reads the layer configuration from a JSON node.

opennn::MultiHeadAttention::get_input_shape
Shape get_input_shape() const override
Returns the input tensor shape.

opennn::MultiHeadAttention::set_dropout_rate
void set_dropout_rate(float new_dropout_rate)
Sets the dropout rate applied to the attention weights.
Definition multihead_attention_layer.h:91

opennn::MultiHeadAttention::MultiHeadAttention
MultiHeadAttention(const Shape &=Shape({0, 0}), Index=0, const string &={})
Constructs a self-attention layer where queries and keys share the same sequence.

opennn::MultiHeadAttention::get_head_dimension
Index get_head_dimension() const
Returns the per-head dimension (embedding_dimension / heads_number).
Definition multihead_attention_layer.h:53

opennn::MultiHeadAttention::get_backward_specs
vector< TensorSpec > get_backward_specs(Index batch_size) const override
Returns the tensor specifications used during back propagation.

opennn::MultiHeadAttention::write_JSON_body
void write_JSON_body(JsonWriter &) const override
Writes the layer configuration to a JSON writer.

layer.h

math_utilities.h

opennn
Definition adaptive_moment_estimation.h:14

operators.h

opennn::AttentionOp
Scaled dot-product attention with optional causal mask and dropout.
Definition operators.h:642

opennn::CombinationOp
Affine combination output = input * weights + bias (the dense matmul building block).
Definition operators.h:232

opennn::MergeOp
Reshapes (batch, heads, seq, head_dim) tensors back into (batch, seq, embed); no parameters.
Definition operators.h:839

opennn::MultiHeadProjectionOp
Projects (input_features) into (heads * head_dim) and reshapes for multi-head attention.
Definition operators.h:579

opennn::Shape
Fixed-capacity small-vector describing tensor dimensions (rank up to MaxRank).
Definition tensor_utilities.h:42