long_short_term_memory_layer.h
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// L O N G S H O R T T E R M M E M O R Y L A Y E R C L A S S H E A D E R
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#ifndef LONGSHORTTERMMEMORYLAYER_H
10#define LONGSHORTTERMMEMORYLAYER_H
11
12// System includes
13
14#include <cmath>
15#include <cstdlib>
16#include <fstream>
17#include <iostream>
18#include <string>
19#include <sstream>
20
21// OpenNN includes
22
23#include "config.h"
24#include "tensor_utilities.h"
25#include "layer.h"
26
27#include "probabilistic_layer.h"
28#include "perceptron_layer.h"
29
30namespace OpenNN
31{
32
33struct LongShortTermMemoryLayerForwardPropagation;
34struct LongShortTermMemoryLayerBackPropagation;
35
36
39
41{
42
43public:
44
46
47 enum class ActivationFunction{Threshold, SymmetricThreshold, Logistic, HyperbolicTangent,
48 Linear, RectifiedLinear, ExponentialLinear, ScaledExponentialLinear,
49 SoftPlus, SoftSign, HardSigmoid};
50
51 // Constructors
52
53 explicit LongShortTermMemoryLayer();
54
55 explicit LongShortTermMemoryLayer(const Index&, const Index&);
56
57 // Destructor
58
60
61 // Get methods
62
63 bool is_empty() const;
64
65 Index get_inputs_number() const;
66 Index get_neurons_number() const;
67
68 // Parameters
69
70 Tensor<type, 1> get_input_biases() const;
71 Tensor<type, 1> get_forget_biases() const;
72 Tensor<type, 1> get_state_biases() const;
73 Tensor<type, 1> get_output_biases() const;
74
75 Tensor<type, 2> get_input_weights() const;
76 Tensor<type, 2> get_forget_weights() const;
77 Tensor<type, 2> get_state_weights() const;
78 Tensor<type, 2> get_output_weights() const;
79
80 Tensor<type, 2> get_input_recurrent_weights() const;
81 Tensor<type, 2> get_forget_recurrent_weights() const;
82 Tensor<type, 2> get_state_recurrent_weights() const;
83 Tensor<type, 2> get_output_recurrent_weights() const;
84
85 Index get_timesteps() const;
86
87 Index get_parameters_number() const;
88 Tensor<type, 1> get_parameters() const;
89
90 // Activation functions
91
94
95 string write_activation_function() const;
97 // Display messages
98
99 const bool& get_display() const;
100
101 // Set methods
102
103 void set();
104 void set(const Index&, const Index&);
105 void set(const LongShortTermMemoryLayer&);
106
107 void set_default();
108 void set_name(const string&);
109
110 // Architecture
111
112 void set_inputs_number(const Index&);
113 void set_neurons_number(const Index&);
114 void set_input_shape(const Tensor<Index, 1>&);
115
116 // Parameters
117
118 void set_input_biases(const Tensor<type, 1>&);
119 void set_forget_biases(const Tensor<type, 1>&);
120 void set_state_biases(const Tensor<type, 1>&);
121 void set_output_biases(const Tensor<type, 1>&);
122
123 void set_input_weights(const Tensor<type, 2>&);
124 void set_forget_weights(const Tensor<type, 2>&);
125 void set_state_weights(const Tensor<type, 2>&);
126 void set_output_weights(const Tensor<type, 2>&);
127
128 void set_input_recurrent_weights(const Tensor<type, 2>&);
129 void set_forget_recurrent_weights(const Tensor<type, 2>&);
130 void set_state_recurrent_weights(const Tensor<type, 2>&);
131 void set_output_recurrent_weights(const Tensor<type, 2>&);
132
133 void set_parameters(const Tensor<type, 1>&, const Index& = 0);
134
135 // Activation functions
136
138 void set_activation_function(const string&);
139
141 void set_recurrent_activation_function(const string&);
142
143 void set_timesteps(const Index&);
144
145 // Display messages
146
147 void set_display(const bool&);
148
149 // Parameters initialization methods
150
151 void set_biases_constant(const type&);
152
153 void set_forget_biases_constant(const type&);
154 void set_input_biases_constant(const type&);
155 void set_state_biases_constant(const type&);
156 void set_output_biases_constant(const type&);
157
158 void set_weights_constant(const type&);
159
160 void set_forget_weights_constant(const type&);
161 void set_input_weights_constant(const type&);
162 void set_state_weights_constant(const type&);
163 void set_output_weights_constant(const type&);
164
165 void set_recurrent_weights_constant(const type&);
166
168 void set_input_recurrent_weights_constant(const type&);
169 void set_state_recurrent_weights_constant(const type&);
171
172 void set_hidden_states_constant(const type&);
173 void set_cell_states_constant(const type&);
174
175 void set_parameters_constant(const type&);
176
178
179 // Long short term memory layer combinations
180
181 void calculate_combinations(const Tensor<type, 1>&,
182 const Tensor<type, 2>&,
183 const Tensor<type, 2>&,
184 const Tensor<type, 1>&,
185 Tensor<type, 1>&) const;
186
187 // Long short term memory layer activations
188
189 void calculate_activations(const Tensor<type, 2>&, Tensor<type, 2>&) const;
190 void calculate_activations(const Tensor<type, 1>&, Tensor<type, 1>&) const;
191 Tensor<type, 1> calculate_activations(const Tensor<type, 1>&) const;
192 void calculate_recurrent_activations(const Tensor<type, 2>&, Tensor<type, 2>&) const;
193 void calculate_recurrent_activations(const Tensor<type, 1>&, Tensor<type, 1>&) const;
194
195 // Long short term memory layer derivatives
196
197 void calculate_activations_derivatives(const Tensor<type, 2>&, Tensor<type, 2>&, Tensor<type, 2>&) const;
198 void calculate_activations_derivatives(const Tensor<type, 1>&, Tensor<type, 1>&, Tensor<type, 1>&) const;
199 void calculate_recurrent_activations_derivatives(const Tensor<type, 1>&, Tensor<type, 1>&, Tensor<type, 1>&) const;
200
201 // Long short term memory layer outputs
202
203 Tensor<type, 2> calculate_outputs(const Tensor<type, 2>&);
204
205 void calculate_hidden_delta(LayerForwardPropagation*,
207 LayerBackPropagation*) const;
208
209
210 void calculate_hidden_delta_perceptron(PerceptronLayerForwardPropagation*,
213
214
215 void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,
218
219 // Forward propagate
220
221 void forward_propagate(const Tensor<type, 2>&, LayerForwardPropagation*);
222
223 void forward_propagate(const Tensor<type, 2>&, Tensor<type, 1>, LayerForwardPropagation*);
224
225 // Eror gradient
226
227 void insert_gradient(LayerBackPropagation*, const Index& , Tensor<type, 1>&) const;
228
229 void calculate_error_gradient(const Tensor<type, 2>&, LayerForwardPropagation*, LayerBackPropagation*) const;
230
231 void calculate_forget_weights_error_gradient(const Tensor<type, 2>&,
234
235 void calculate_input_weights_error_gradient(const Tensor<type, 2>&,
238
239 void calculate_state_weights_error_gradient(const Tensor<type, 2>&,
242
243 void calculate_output_weights_error_gradient(const Tensor<type, 2>&,
246
247 void calculate_forget_recurrent_weights_error_gradient(const Tensor<type, 2>&,
250
251 void calculate_input_recurrent_weights_error_gradient(const Tensor<type, 2>&,
254
255 void calculate_state_recurrent_weights_error_gradient(const Tensor<type, 2>&,
258
259 void calculate_output_recurrent_weights_error_gradient(const Tensor<type, 2>&,
262
263 void calculate_forget_biases_error_gradient(const Tensor<type, 2>&,
266
267 void calculate_input_biases_error_gradient(const Tensor<type, 2>&,
270
271 void calculate_state_biases_error_gradient(const Tensor<type, 2>&,
274
275 void calculate_output_biases_error_gradient(const Tensor<type, 2>&,
278
279 // Expression methods
280
281 string write_expression(const Tensor<string, 1>&, const Tensor<string, 1>&) const;
282 string write_recurrent_activation_function_expression() const;
283 string write_activation_function_expression() const;
284
285 string write_expression_c() const;
286 string write_combinations_c() const;
287
288 string write_expression_python() const;
289 string write_combinations_python() const;
290
291 // Serialization methods
292
293 void from_XML(const tinyxml2::XMLDocument&);
294
295 void write_XML(tinyxml2::XMLPrinter&) const;
296
297protected:
298
299 Index timesteps = 3;
300
301 Tensor<type, 1> input_biases;
302 Tensor<type, 1> forget_biases;
303 Tensor<type, 1> state_biases;
304 Tensor<type, 1> output_biases;
305
306 Tensor<type, 2> input_weights;
307 Tensor<type, 2> forget_weights;
308 Tensor<type, 2> state_weights;
309 Tensor<type, 2> output_weights;
310
311 Tensor<type, 2> forget_recurrent_weights;
312 Tensor<type, 2> input_recurrent_weights;
313 Tensor<type, 2> state_recurrent_weights;
314 Tensor<type, 2> output_recurrent_weights;
315
317
318 ActivationFunction activation_function = ActivationFunction::HyperbolicTangent;
319 ActivationFunction recurrent_activation_function = ActivationFunction::HardSigmoid;
320
321 Index batch;
322 Index variables;
323
324 Tensor<type, 1> hidden_states;
325 Tensor<type, 1> cell_states;
326
328
329 bool display = true;
330
331#ifdef OPENNN_CUDA
332 #include "../../opennn-cuda/opennn-cuda/long_short_term_memory_layer_cuda.h"
333#endif
334
335};
336
337
339{
341 {
342 }
343
344 explicit LongShortTermMemoryLayerForwardPropagation(const Index& new_batch_samples_number, Layer* new_layer_pointer)
346 {
347 set(new_batch_samples_number, new_layer_pointer);
348 }
349
350
351 void set(const Index& new_batch_samples_number, Layer* new_layer_pointer)
352 {
353 layer_pointer = new_layer_pointer;
354
355 const Index inputs_number = layer_pointer->get_inputs_number();
356 const Index neurons_number = layer_pointer->get_neurons_number();
357
358 batch_samples_number = new_batch_samples_number;
359
360 previous_hidden_state_activations.resize(neurons_number);
361 previous_cell_state_activations.resize(neurons_number);
362
363 current_inputs.resize(inputs_number);
364
365 current_forget_combinations.resize(neurons_number);
366 current_input_combinations.resize(neurons_number);
367 current_state_combinations.resize(neurons_number);
368 current_output_combinations.resize(neurons_number);
369
370 current_forget_activations.resize(neurons_number);
371 current_input_activations.resize(neurons_number);
372 current_state_activations.resize(neurons_number);
373 current_output_activations.resize(neurons_number);
374
375 current_cell_state_activations.resize(neurons_number);
376
377 current_forget_activations_derivatives.resize(neurons_number);
378 current_input_activations_derivatives.resize(neurons_number);
379 current_state_activations_derivatives.resize(neurons_number);
380 current_output_activations_derivatives.resize(neurons_number);
381 current_hidden_states_derivatives.resize(neurons_number);
382
383 forget_activations.resize(batch_samples_number, neurons_number);
384 input_activations.resize(batch_samples_number, neurons_number);
385 state_activations.resize(batch_samples_number, neurons_number);
386 output_activations.resize(batch_samples_number, neurons_number);
387 cell_states_activations.resize(batch_samples_number, neurons_number);
388 hidden_states_activations.resize(batch_samples_number, neurons_number);
389
390 forget_activations_derivatives.resize(batch_samples_number, neurons_number);
391 input_activations_derivatives.resize(batch_samples_number, neurons_number);
392 state_activations_derivatives.resize(batch_samples_number, neurons_number);
393 output_activations_derivatives.resize(batch_samples_number, neurons_number);
394 cell_states_activations_derivatives.resize(batch_samples_number, neurons_number);
395 hidden_states_activations_derivatives.resize(batch_samples_number, neurons_number);
396
397 combinations.resize(batch_samples_number, neurons_number);
398 activations.resize(batch_samples_number, neurons_number);
399 }
400
401 void print() const
402 {
403
404 }
405
406 Tensor<type, 2> combinations;
407 Tensor<type, 2> activations;
408
409 Tensor<type, 1> previous_hidden_state_activations;
410 Tensor<type, 1> previous_cell_state_activations;
411
412 Tensor<type, 1> current_inputs;
413
414 Tensor<type, 1> current_forget_combinations;
415 Tensor<type, 1> current_input_combinations;
416 Tensor<type, 1> current_state_combinations;
417 Tensor<type, 1> current_output_combinations;
418
419 Tensor<type, 1> current_forget_activations;
420 Tensor<type, 1> current_input_activations;
421 Tensor<type, 1> current_state_activations;
422 Tensor<type, 1> current_output_activations;
423
424 Tensor<type, 1> current_forget_activations_derivatives;
425 Tensor<type, 1> current_input_activations_derivatives;
426 Tensor<type, 1> current_state_activations_derivatives;
427 Tensor<type, 1> current_output_activations_derivatives;
428
429 Tensor<type, 1> current_hidden_states_derivatives;
430
431 Tensor<type, 1> current_cell_state_activations;
432
433 Tensor<type, 2, RowMajor> forget_activations;
434 Tensor<type, 2, RowMajor> input_activations;
435 Tensor<type, 2, RowMajor> state_activations;
436 Tensor<type, 2, RowMajor> output_activations;
437 Tensor<type, 2, RowMajor> cell_states_activations;
438 Tensor<type, 2, RowMajor> hidden_states_activations;
439
440 Tensor<type, 2, RowMajor> forget_activations_derivatives;
441 Tensor<type, 2, RowMajor> input_activations_derivatives;
442 Tensor<type, 2, RowMajor> state_activations_derivatives;
443 Tensor<type, 2, RowMajor> output_activations_derivatives;
444 Tensor<type, 2, RowMajor> cell_states_activations_derivatives;
445 Tensor<type, 2, RowMajor> hidden_states_activations_derivatives;
446};
447
448
450{
452 {
453 }
454
455
456 explicit LongShortTermMemoryLayerBackPropagation(const Index& new_batch_samples_number, Layer* new_layer_pointer)
458 {
459 set(new_batch_samples_number, new_layer_pointer);
460 }
461
462
463 void set(const Index& new_batch_samples_number, Layer* new_layer_pointer)
464 {
465 layer_pointer = new_layer_pointer;
466 batch_samples_number = new_batch_samples_number;
467
468 const Index neurons_number = layer_pointer->get_neurons_number();
469 const Index inputs_number = layer_pointer->get_inputs_number();
470
471 current_layer_deltas.resize(neurons_number);
472
473 forget_weights_derivatives.resize(inputs_number*neurons_number);
474 input_weights_derivatives.resize(inputs_number*neurons_number);
475 state_weights_derivatives.resize(inputs_number*neurons_number);
476 output_weights_derivatives.resize(inputs_number*neurons_number);
477
478 forget_recurrent_weights_derivatives.resize(neurons_number*neurons_number);
479 input_recurrent_weights_derivatives.resize(neurons_number*neurons_number);
480 state_recurrent_weights_derivatives.resize(neurons_number*neurons_number);
481 output_recurrent_weights_derivatives.resize(neurons_number*neurons_number);
482
483 forget_biases_derivatives.resize(neurons_number);
484 input_biases_derivatives.resize(neurons_number);
485 state_biases_derivatives.resize(neurons_number);
486 output_biases_derivatives.resize(neurons_number);
487
488 delta.resize(batch_samples_number, neurons_number);
489
490 input_combinations_biases_derivatives.resize(neurons_number, neurons_number);
491 forget_combinations_biases_derivatives.resize(neurons_number, neurons_number);
492 state_combinations_biases_derivatives.resize(neurons_number, neurons_number);
493 output_combinations_biases_derivatives.resize(neurons_number, neurons_number);
494
495 hidden_states_biases_derivatives.resize(neurons_number, neurons_number);
496 cell_state_biases_derivatives.resize(neurons_number, neurons_number);
497
498 input_combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
499 forget_combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
500 state_combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
501 output_combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
502
503 hidden_states_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
504 cell_state_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
505
506 input_combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
507 forget_combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
508 state_combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
509 output_combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
510
511 hidden_states_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
512 cell_state_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
513 }
514
515 void print() const
516 {
517
518 }
519
520 Tensor<type, 1> current_layer_deltas;
521
522 Tensor<type, 2> delta;
523
524 Tensor<type, 1> forget_weights_derivatives;
525 Tensor<type, 1> input_weights_derivatives;
526 Tensor<type, 1> state_weights_derivatives;
527 Tensor<type, 1> output_weights_derivatives;
528
529 Tensor<type, 1> forget_recurrent_weights_derivatives;
530 Tensor<type, 1> input_recurrent_weights_derivatives;
531 Tensor<type, 1> state_recurrent_weights_derivatives;
532 Tensor<type, 1> output_recurrent_weights_derivatives;
533
534 Tensor<type, 1> forget_biases_derivatives;
535 Tensor<type, 1> input_biases_derivatives;
536 Tensor<type, 1> state_biases_derivatives;
537 Tensor<type, 1> output_biases_derivatives;
538
539 Tensor<type, 2> input_combinations_biases_derivatives;
540 Tensor<type, 2> forget_combinations_biases_derivatives;
541 Tensor<type, 2> state_combinations_biases_derivatives;
542 Tensor<type, 2> output_combinations_biases_derivatives;
543
544 Tensor<type, 2> hidden_states_biases_derivatives;
545 Tensor<type, 2> cell_state_biases_derivatives;
546
547 Tensor<type, 2> input_combinations_weights_derivatives;
548 Tensor<type, 2> forget_combinations_weights_derivatives;
549 Tensor<type, 2> state_combinations_weights_derivatives;
550 Tensor<type, 2> output_combinations_weights_derivatives;
551
552 Tensor<type, 2> hidden_states_weights_derivatives;
553 Tensor<type, 2> cell_state_weights_derivatives;
554
555 Tensor<type, 2> input_combinations_recurrent_weights_derivatives;
556 Tensor<type, 2> forget_combinations_recurrent_weights_derivatives;
557 Tensor<type, 2> state_combinations_recurrent_weights_derivatives;
558 Tensor<type, 2> output_combinations_recurrent_weights_derivatives;
559
560 Tensor<type, 2> hidden_states_recurrent_weights_derivatives;
561 Tensor<type, 2> cell_state_recurrent_weights_derivatives;
562};
563
564
565}
566
567#endif
568
569
570// OpenNN: Open Neural Networks Library.
571// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.
572//
573// This library is free software; you can redistribute it and/or
574// modify it under the terms of the GNU Lesser General Public
575// License as published by the Free Software Foundation; either
576// version 2.1 of the License, or any later version.
577//
578// This library is distributed in the hope that it will be useful,
579// but WITHOUT ANY WARRANTY; without even the implied warranty of
580// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
581// Lesser General Public License for more details.
582
583// You should have received a copy of the GNU Lesser General Public
584// License along with this library; if not, write to the Free Software
585
586// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
This abstract class represents the concept of layer of neurons in OpenNN.
Definition: layer.h:53
virtual Index get_inputs_number() const
Returns the number of inputs.
Definition: layer.cpp:153
void set_recurrent_activation_function(const ActivationFunction &)
void set_state_weights(const Tensor< type, 2 > &)
Tensor< type, 2 > get_output_recurrent_weights() const
Index get_inputs_number() const
Returns the number of inputs to the layer.
void set_forget_recurrent_weights(const Tensor< type, 2 > &)
string write_expression(const Tensor< string, 1 > &, const Tensor< string, 1 > &) const
bool display
Display messages to screen.
Tensor< type, 2 > get_forget_recurrent_weights() const
ActivationFunction
Enumeration of available activation functions for the long-short term memory layer.
void set_input_biases(const Tensor< type, 1 > &)
void set_activation_function(const ActivationFunction &)
Tensor< type, 2 > get_input_recurrent_weights() const
void set_input_weights(const Tensor< type, 2 > &)
void set_state_recurrent_weights(const Tensor< type, 2 > &)
void set_forget_biases(const Tensor< type, 1 > &)
Tensor< type, 2 > get_state_recurrent_weights() const
void set_state_biases(const Tensor< type, 1 > &)
void set_output_recurrent_weights(const Tensor< type, 2 > &)
Index get_neurons_number() const
Returns the size of the neurons vector.
void set_input_shape(const Tensor< Index, 1 > &)
void set_input_recurrent_weights(const Tensor< type, 2 > &)
Index get_timesteps() const
Returns the number of timesteps.
void set_forget_weights(const Tensor< type, 2 > &)
void set_output_weights(const Tensor< type, 2 > &)
const LongShortTermMemoryLayer::ActivationFunction & get_activation_function() const
Returns the activation function of the layer.
void set_parameters(const Tensor< type, 1 > &, const Index &=0)
void set_output_biases(const Tensor< type, 1 > &)
Index get_parameters_number() const
Returns the number of parameters (biases, weights, recurrent weights) of the layer.
ActivationFunction activation_function
Activation function variable.
const LongShortTermMemoryLayer::ActivationFunction & get_recurrent_activation_function() const
Returns the recurrent activation function of the layer.
LayerBackPropagation()
Default constructor.
Definition: layer.h:305
LayerForwardPropagation()
Default constructor.
Definition: layer.h:285