Reshapes (batch, heads, seq, head_dim) tensors back into (batch, seq, embed); no parameters.
More...
|
| void | set (Index heads_number, Index query_sequence_length, Index head_dimension, Type compute_dtype) |
| | Configures the merge geometry.
|
| |
| void | forward_propagate (ForwardPropagation &fp, size_t layer, bool is_training) noexcept override |
| | Runs the operator's forward computation.
|
| |
| void | back_propagate (ForwardPropagation &fp, BackPropagation &bp, size_t layer) const noexcept override |
| | Runs the operator's backward computation, accumulating into gradient/delta buffers.
|
| |
| virtual | ~Operator ()=default |
| |
| virtual vector< TensorSpec > | parameter_specs () const |
| | Returns the tensor specs of trainable parameters owned by this operator.
|
| |
| virtual vector< TensorSpec > | state_specs () const |
| | Returns the tensor specs of persistent state owned by this operator.
|
| |
| virtual void | link_parameters (span< const TensorView >) |
| | Binds parameter views provided by the hosting layer.
|
| |
| virtual void | link_gradients (span< const TensorView >) |
| | Binds gradient views provided by the hosting layer.
|
| |
| virtual void | link_states (span< const TensorView >) |
| | Binds state views provided by the hosting layer.
|
| |
| virtual void | set_parameters_random () |
| | Initializes parameters with random values.
|
| |
| virtual void | set_parameters_glorot () |
| | Initializes parameters using Glorot (Xavier) initialization.
|
| |
| virtual void | to_JSON (JsonWriter &) const |
| | Serializes the operator configuration to a JSON writer.
|
| |
| virtual void | from_JSON (const Json *) |
| | Restores the operator configuration from a JSON node.
|
| |
| virtual void | load_state_from_JSON (const Json *) |
| | Restores persistent state (e.g. running statistics) from a JSON node.
|
| |
| virtual void | destroy_cuda () |
| | Releases CUDA resources owned by the operator; called from destructors.
|
| |
| TensorView & | get_input (ForwardPropagation &fp, size_t layer, size_t i=0) const noexcept |
| |
| vector< TensorView > & | get_inputs (ForwardPropagation &fp, size_t layer, size_t i=0) const noexcept |
| |
| TensorView & | get_output (ForwardPropagation &fp, size_t layer, size_t i=0) const noexcept |
| |
| TensorView & | get_output_delta (BackPropagation &bp, size_t layer, size_t i=0) const noexcept |
| |
| TensorView & | get_input_delta (BackPropagation &bp, size_t layer, size_t i=0) const noexcept |
| |
Reshapes (batch, heads, seq, head_dim) tensors back into (batch, seq, embed); no parameters.