9#ifndef LONGSHORTTERMMEMORYLAYER_H
10#define LONGSHORTTERMMEMORYLAYER_H
24#include "tensor_utilities.h"
27#include "probabilistic_layer.h"
28#include "perceptron_layer.h"
33struct LongShortTermMemoryLayerForwardPropagation;
34struct LongShortTermMemoryLayerBackPropagation;
48 Linear, RectifiedLinear, ExponentialLinear, ScaledExponentialLinear,
49 SoftPlus, SoftSign, HardSigmoid};
63 bool is_empty()
const;
104 void set(
const Index&,
const Index&);
108 void set_name(
const string&);
181 void calculate_combinations(
const Tensor<type, 1>&,
182 const Tensor<type, 2>&,
183 const Tensor<type, 2>&,
184 const Tensor<type, 1>&,
185 Tensor<type, 1>&)
const;
189 void calculate_activations(
const Tensor<type, 2>&, Tensor<type, 2>&)
const;
190 void calculate_activations(
const Tensor<type, 1>&, Tensor<type, 1>&)
const;
191 Tensor<type, 1> calculate_activations(
const Tensor<type, 1>&)
const;
192 void calculate_recurrent_activations(
const Tensor<type, 2>&, Tensor<type, 2>&)
const;
193 void calculate_recurrent_activations(
const Tensor<type, 1>&, Tensor<type, 1>&)
const;
197 void calculate_activations_derivatives(
const Tensor<type, 2>&, Tensor<type, 2>&, Tensor<type, 2>&)
const;
198 void calculate_activations_derivatives(
const Tensor<type, 1>&, Tensor<type, 1>&, Tensor<type, 1>&)
const;
199 void calculate_recurrent_activations_derivatives(
const Tensor<type, 1>&, Tensor<type, 1>&, Tensor<type, 1>&)
const;
203 Tensor<type, 2> calculate_outputs(
const Tensor<type, 2>&);
231 void calculate_forget_weights_error_gradient(
const Tensor<type, 2>&,
235 void calculate_input_weights_error_gradient(
const Tensor<type, 2>&,
239 void calculate_state_weights_error_gradient(
const Tensor<type, 2>&,
243 void calculate_output_weights_error_gradient(
const Tensor<type, 2>&,
247 void calculate_forget_recurrent_weights_error_gradient(
const Tensor<type, 2>&,
251 void calculate_input_recurrent_weights_error_gradient(
const Tensor<type, 2>&,
255 void calculate_state_recurrent_weights_error_gradient(
const Tensor<type, 2>&,
259 void calculate_output_recurrent_weights_error_gradient(
const Tensor<type, 2>&,
263 void calculate_forget_biases_error_gradient(
const Tensor<type, 2>&,
267 void calculate_input_biases_error_gradient(
const Tensor<type, 2>&,
271 void calculate_state_biases_error_gradient(
const Tensor<type, 2>&,
275 void calculate_output_biases_error_gradient(
const Tensor<type, 2>&,
281 string write_expression(
const Tensor<string, 1>&,
const Tensor<string, 1>&)
const;
282 string write_recurrent_activation_function_expression()
const;
283 string write_activation_function_expression()
const;
285 string write_expression_c()
const;
288 string write_expression_python()
const;
301 Tensor<type, 1> input_biases;
302 Tensor<type, 1> forget_biases;
303 Tensor<type, 1> state_biases;
304 Tensor<type, 1> output_biases;
306 Tensor<type, 2> input_weights;
307 Tensor<type, 2> forget_weights;
308 Tensor<type, 2> state_weights;
309 Tensor<type, 2> output_weights;
311 Tensor<type, 2> forget_recurrent_weights;
312 Tensor<type, 2> input_recurrent_weights;
313 Tensor<type, 2> state_recurrent_weights;
314 Tensor<type, 2> output_recurrent_weights;
324 Tensor<type, 1> hidden_states;
325 Tensor<type, 1> cell_states;
332 #include "../../opennn-cuda/opennn-cuda/long_short_term_memory_layer_cuda.h"
347 set(new_batch_samples_number, new_layer_pointer);
351 void set(
const Index& new_batch_samples_number,
Layer* new_layer_pointer)
353 layer_pointer = new_layer_pointer;
356 const Index neurons_number = layer_pointer->get_neurons_number();
358 batch_samples_number = new_batch_samples_number;
360 previous_hidden_state_activations.resize(neurons_number);
361 previous_cell_state_activations.resize(neurons_number);
363 current_inputs.resize(inputs_number);
365 current_forget_combinations.resize(neurons_number);
366 current_input_combinations.resize(neurons_number);
367 current_state_combinations.resize(neurons_number);
368 current_output_combinations.resize(neurons_number);
370 current_forget_activations.resize(neurons_number);
371 current_input_activations.resize(neurons_number);
372 current_state_activations.resize(neurons_number);
373 current_output_activations.resize(neurons_number);
375 current_cell_state_activations.resize(neurons_number);
377 current_forget_activations_derivatives.resize(neurons_number);
378 current_input_activations_derivatives.resize(neurons_number);
379 current_state_activations_derivatives.resize(neurons_number);
380 current_output_activations_derivatives.resize(neurons_number);
381 current_hidden_states_derivatives.resize(neurons_number);
383 forget_activations.resize(batch_samples_number, neurons_number);
384 input_activations.resize(batch_samples_number, neurons_number);
385 state_activations.resize(batch_samples_number, neurons_number);
386 output_activations.resize(batch_samples_number, neurons_number);
387 cell_states_activations.resize(batch_samples_number, neurons_number);
388 hidden_states_activations.resize(batch_samples_number, neurons_number);
390 forget_activations_derivatives.resize(batch_samples_number, neurons_number);
391 input_activations_derivatives.resize(batch_samples_number, neurons_number);
392 state_activations_derivatives.resize(batch_samples_number, neurons_number);
393 output_activations_derivatives.resize(batch_samples_number, neurons_number);
394 cell_states_activations_derivatives.resize(batch_samples_number, neurons_number);
395 hidden_states_activations_derivatives.resize(batch_samples_number, neurons_number);
397 combinations.resize(batch_samples_number, neurons_number);
398 activations.resize(batch_samples_number, neurons_number);
406 Tensor<type, 2> combinations;
407 Tensor<type, 2> activations;
409 Tensor<type, 1> previous_hidden_state_activations;
410 Tensor<type, 1> previous_cell_state_activations;
412 Tensor<type, 1> current_inputs;
414 Tensor<type, 1> current_forget_combinations;
415 Tensor<type, 1> current_input_combinations;
416 Tensor<type, 1> current_state_combinations;
417 Tensor<type, 1> current_output_combinations;
419 Tensor<type, 1> current_forget_activations;
420 Tensor<type, 1> current_input_activations;
421 Tensor<type, 1> current_state_activations;
422 Tensor<type, 1> current_output_activations;
424 Tensor<type, 1> current_forget_activations_derivatives;
425 Tensor<type, 1> current_input_activations_derivatives;
426 Tensor<type, 1> current_state_activations_derivatives;
427 Tensor<type, 1> current_output_activations_derivatives;
429 Tensor<type, 1> current_hidden_states_derivatives;
431 Tensor<type, 1> current_cell_state_activations;
433 Tensor<type, 2, RowMajor> forget_activations;
434 Tensor<type, 2, RowMajor> input_activations;
435 Tensor<type, 2, RowMajor> state_activations;
436 Tensor<type, 2, RowMajor> output_activations;
437 Tensor<type, 2, RowMajor> cell_states_activations;
438 Tensor<type, 2, RowMajor> hidden_states_activations;
440 Tensor<type, 2, RowMajor> forget_activations_derivatives;
441 Tensor<type, 2, RowMajor> input_activations_derivatives;
442 Tensor<type, 2, RowMajor> state_activations_derivatives;
443 Tensor<type, 2, RowMajor> output_activations_derivatives;
444 Tensor<type, 2, RowMajor> cell_states_activations_derivatives;
445 Tensor<type, 2, RowMajor> hidden_states_activations_derivatives;
459 set(new_batch_samples_number, new_layer_pointer);
463 void set(
const Index& new_batch_samples_number,
Layer* new_layer_pointer)
465 layer_pointer = new_layer_pointer;
466 batch_samples_number = new_batch_samples_number;
468 const Index neurons_number = layer_pointer->get_neurons_number();
471 current_layer_deltas.resize(neurons_number);
473 forget_weights_derivatives.resize(inputs_number*neurons_number);
474 input_weights_derivatives.resize(inputs_number*neurons_number);
475 state_weights_derivatives.resize(inputs_number*neurons_number);
476 output_weights_derivatives.resize(inputs_number*neurons_number);
478 forget_recurrent_weights_derivatives.resize(neurons_number*neurons_number);
479 input_recurrent_weights_derivatives.resize(neurons_number*neurons_number);
480 state_recurrent_weights_derivatives.resize(neurons_number*neurons_number);
481 output_recurrent_weights_derivatives.resize(neurons_number*neurons_number);
483 forget_biases_derivatives.resize(neurons_number);
484 input_biases_derivatives.resize(neurons_number);
485 state_biases_derivatives.resize(neurons_number);
486 output_biases_derivatives.resize(neurons_number);
488 delta.resize(batch_samples_number, neurons_number);
490 input_combinations_biases_derivatives.resize(neurons_number, neurons_number);
491 forget_combinations_biases_derivatives.resize(neurons_number, neurons_number);
492 state_combinations_biases_derivatives.resize(neurons_number, neurons_number);
493 output_combinations_biases_derivatives.resize(neurons_number, neurons_number);
495 hidden_states_biases_derivatives.resize(neurons_number, neurons_number);
496 cell_state_biases_derivatives.resize(neurons_number, neurons_number);
498 input_combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
499 forget_combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
500 state_combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
501 output_combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
503 hidden_states_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
504 cell_state_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
506 input_combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
507 forget_combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
508 state_combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
509 output_combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
511 hidden_states_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
512 cell_state_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
520 Tensor<type, 1> current_layer_deltas;
522 Tensor<type, 2> delta;
524 Tensor<type, 1> forget_weights_derivatives;
525 Tensor<type, 1> input_weights_derivatives;
526 Tensor<type, 1> state_weights_derivatives;
527 Tensor<type, 1> output_weights_derivatives;
529 Tensor<type, 1> forget_recurrent_weights_derivatives;
530 Tensor<type, 1> input_recurrent_weights_derivatives;
531 Tensor<type, 1> state_recurrent_weights_derivatives;
532 Tensor<type, 1> output_recurrent_weights_derivatives;
534 Tensor<type, 1> forget_biases_derivatives;
535 Tensor<type, 1> input_biases_derivatives;
536 Tensor<type, 1> state_biases_derivatives;
537 Tensor<type, 1> output_biases_derivatives;
539 Tensor<type, 2> input_combinations_biases_derivatives;
540 Tensor<type, 2> forget_combinations_biases_derivatives;
541 Tensor<type, 2> state_combinations_biases_derivatives;
542 Tensor<type, 2> output_combinations_biases_derivatives;
544 Tensor<type, 2> hidden_states_biases_derivatives;
545 Tensor<type, 2> cell_state_biases_derivatives;
547 Tensor<type, 2> input_combinations_weights_derivatives;
548 Tensor<type, 2> forget_combinations_weights_derivatives;
549 Tensor<type, 2> state_combinations_weights_derivatives;
550 Tensor<type, 2> output_combinations_weights_derivatives;
552 Tensor<type, 2> hidden_states_weights_derivatives;
553 Tensor<type, 2> cell_state_weights_derivatives;
555 Tensor<type, 2> input_combinations_recurrent_weights_derivatives;
556 Tensor<type, 2> forget_combinations_recurrent_weights_derivatives;
557 Tensor<type, 2> state_combinations_recurrent_weights_derivatives;
558 Tensor<type, 2> output_combinations_recurrent_weights_derivatives;
560 Tensor<type, 2> hidden_states_recurrent_weights_derivatives;
561 Tensor<type, 2> cell_state_recurrent_weights_derivatives;
This abstract class represents the concept of layer of neurons in OpenNN.
virtual Index get_inputs_number() const
Returns the number of inputs.
void set_recurrent_activation_function(const ActivationFunction &)
string write_activation_function() const
void set_state_weights(const Tensor< type, 2 > &)
Tensor< type, 2 > get_forget_weights() const
Tensor< type, 2 > get_output_recurrent_weights() const
void set_parameters_constant(const type &)
Tensor< type, 2 > get_output_weights() const
void set_input_weights_constant(const type &)
const bool & get_display() const
Tensor< type, 1 > get_state_biases() const
Index get_inputs_number() const
Returns the number of inputs to the layer.
Tensor< type, 2 > get_input_weights() const
void set_forget_recurrent_weights(const Tensor< type, 2 > &)
void set_biases_constant(const type &)
string write_expression(const Tensor< string, 1 > &, const Tensor< string, 1 > &) const
void set_output_weights_constant(const type &)
LongShortTermMemoryLayer()
void set_output_biases_constant(const type &)
void set_weights_constant(const type &)
void set_forget_weights_constant(const type &)
void set_output_recurrent_weights_constant(const type &)
bool display
Display messages to screen.
void set_forget_biases_constant(const type &)
Tensor< type, 2 > get_forget_recurrent_weights() const
ActivationFunction
Enumeration of available activation functions for the long-short term memory layer.
void set_state_biases_constant(const type &)
void set_input_biases(const Tensor< type, 1 > &)
Tensor< type, 2 > get_state_weights() const
void set_activation_function(const ActivationFunction &)
void set_recurrent_weights_constant(const type &)
Tensor< type, 2 > get_input_recurrent_weights() const
Tensor< type, 1 > get_forget_biases() const
void set_input_recurrent_weights_constant(const type &)
void set_input_weights(const Tensor< type, 2 > &)
void set_state_recurrent_weights(const Tensor< type, 2 > &)
void set_cell_states_constant(const type &)
void set_forget_biases(const Tensor< type, 1 > &)
Tensor< type, 2 > get_state_recurrent_weights() const
string write_recurrent_activation_function() const
void set_input_biases_constant(const type &)
void set_inputs_number(const Index &)
Tensor< type, 1 > get_output_biases() const
void set_timesteps(const Index &)
string write_combinations_c() const
void set_forget_recurrent_weights_constant(const type &)
void set_state_biases(const Tensor< type, 1 > &)
void set_output_recurrent_weights(const Tensor< type, 2 > &)
Index get_neurons_number() const
Returns the size of the neurons vector.
void set_input_shape(const Tensor< Index, 1 > &)
void set_input_recurrent_weights(const Tensor< type, 2 > &)
Index get_timesteps() const
Returns the number of timesteps.
void set_forget_weights(const Tensor< type, 2 > &)
void set_parameters_random()
void set_output_weights(const Tensor< type, 2 > &)
virtual ~LongShortTermMemoryLayer()
const LongShortTermMemoryLayer::ActivationFunction & get_activation_function() const
Returns the activation function of the layer.
void set_state_weights_constant(const type &)
void set_parameters(const Tensor< type, 1 > &, const Index &=0)
void set_display(const bool &)
void set_hidden_states_constant(const type &)
void set_output_biases(const Tensor< type, 1 > &)
Index get_parameters_number() const
Returns the number of parameters (biases, weights, recurrent weights) of the layer.
ActivationFunction activation_function
Activation function variable.
const LongShortTermMemoryLayer::ActivationFunction & get_recurrent_activation_function() const
Returns the recurrent activation function of the layer.
void set_state_recurrent_weights_constant(const type &)
void set_neurons_number(const Index &)
string write_combinations_python() const
Tensor< type, 1 > get_input_biases() const
Tensor< type, 1 > get_parameters() const
LayerBackPropagation()
Default constructor.
LayerForwardPropagation()
Default constructor.