recurrent_layer.h
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// R E C U R R E N T L A Y E R C L A S S H E A D E R
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#ifndef RECURRENTLAYER_H
10#define RECURRENTLAYER_H
11
12// System includes
13
14#include <cmath>
15#include <cstdlib>
16#include <fstream>
17#include <iostream>
18#include <string>
19#include <sstream>
20
21// OpenNN includes
22
23#include "config.h"
24#include "tensor_utilities.h"
25#include "layer.h"
26
27#include "probabilistic_layer.h"
28#include "perceptron_layer.h"
29
30namespace OpenNN
31{
32
33struct RecurrentLayerForwardPropagation;
34struct RecurrentLayerBackPropagation;
35
36
37#ifdef OPENNN_CUDA
38 #include "../../opennn-cuda/opennn-cuda/struct_recurrent_layer_cuda.h"
39#endif
40
43
44class RecurrentLayer : public Layer
45{
46
47public:
48
50
51 enum class ActivationFunction{Threshold, SymmetricThreshold, Logistic, HyperbolicTangent,
52 Linear, RectifiedLinear, ExponentialLinear,
53 ScaledExponentialLinear, SoftPlus, SoftSign, HardSigmoid};
54
55 // Constructors
56
57 explicit RecurrentLayer();
58
59 explicit RecurrentLayer(const Index&, const Index&);
60
61 // Destructor
62
63 virtual ~RecurrentLayer();
64
65 // Get methods
66
67 bool is_empty() const;
68
69 Index get_inputs_number() const;
70 Index get_neurons_number() const;
71
72 const Tensor<type, 1>& get_hidden_states() const;
73
74 // Parameters
75
76 Index get_timesteps() const;
77
78 Tensor<type, 1> get_biases() const;
79 const Tensor<type, 2>& get_input_weights() const;
80 const Tensor<type, 2>& get_recurrent_weights() const;
81
82 Index get_biases_number() const;
83 Index get_input_weights_number() const;
84 Index get_recurrent_weights_number() const;
85
86 Index get_parameters_number() const;
87 Tensor<type, 1> get_parameters() const;
88
89 Tensor<type, 2> get_biases(const Tensor<type, 1>&) const;
90 Tensor<type, 2> get_input_weights(const Tensor<type, 1>&) const;
91 Tensor<type, 2> get_recurrent_weights(const Tensor<type, 1>&) const;
92
93 // Activation functions
94
96
97 string write_activation_function() const;
98
99 // Display messages
100
101 const bool& get_display() const;
102
103 // Set methods
104
105 void set();
106 void set(const Index&, const Index&);
107 void set(const RecurrentLayer&);
108
109 void set_default();
110
111 // Architecture
112
113 void set_inputs_number(const Index&);
114 void set_neurons_number(const Index&);
115 void set_input_shape(const Tensor<Index, 1>&);
116
117 // Parameters
118
119 void set_timesteps(const Index&);
120
121 void set_biases(const Tensor<type, 1>&);
122
123 void set_input_weights(const Tensor<type, 2>&);
124
125 void set_recurrent_weights(const Tensor<type, 2>&);
126
127 void set_parameters(const Tensor<type, 1>&, const Index& = 0);
128
129 // Activation functions
130
132 void set_activation_function(const string&);
133
134 // Display messages
135
136 void set_display(const bool&);
137
138 // Parameters initialization methods
139
140 void set_hidden_states_constant(const type&);
141
142 void set_biases_constant(const type&);
143
144 void set_input_weights_constant(const type&);
145 void set_recurrent_weights_constant(const type&);
146 void initialize_input_weights_Glorot(const type&, const type&);
147
148 void set_parameters_constant(const type&);
149
151
152 // neuron layer combinations
153
154 void calculate_combinations(const Tensor<type, 1>&,
155 const Tensor<type, 2>&,
156 const Tensor<type, 2>&,
157 const Tensor<type, 1>&,
158 Tensor<type, 1>&) const;
159
160 void calculate_activations(const Tensor<type, 1>&,
161 Tensor<type, 1>&) const;
162
163 void calculate_activations_derivatives(const Tensor<type, 1>&,
164 Tensor<type, 1>&,
165 Tensor<type, 1>&) const;
166
167 void calculate_activations_derivatives(const Tensor<type, 2>&,
168 Tensor<type, 2>&,
169 Tensor<type, 2>&) const;
170
171
172 // neuron layer outputs
173
174 Tensor<type, 2> calculate_outputs(const Tensor<type, 2>&);
175
176 void forward_propagate(const Tensor<type, 2>&, LayerForwardPropagation*);
177
178 void forward_propagate(const Tensor<type, 2>&, const Tensor<type, 1>, LayerForwardPropagation*);
179
180 void calculate_hidden_delta(LayerForwardPropagation*,
182 LayerBackPropagation*) const;
183
184 void calculate_hidden_delta_perceptron(PerceptronLayerForwardPropagation*,
187
188 void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,
191
192 // Gradient
193
194 void insert_gradient(LayerBackPropagation*, const Index& , Tensor<type, 1>&) const;
195
196 void calculate_error_gradient(const Tensor<type, 2>&,
198 LayerBackPropagation*) const;
199
200 void calculate_biases_error_gradient(const Tensor<type, 2>&,
203
204 void calculate_input_weights_error_gradient(const Tensor<type, 2>&,
207
208 void calculate_recurrent_weights_error_gradient(const Tensor<type, 2>&,
211
212 // Expression methods
213
214 string write_expression(const Tensor<string, 1>&, const Tensor<string, 1>&) const;
215
216 string write_activation_function_expression() const;
217
218 string write_expression_python() const;
219 string write_combinations_python() const;
220 string write_activations_python() const;
221
222 // Serialization methods
223
224 void from_XML(const tinyxml2::XMLDocument&);
225
226 void write_XML(tinyxml2::XMLPrinter&) const;
227
228protected:
229
230 Index timesteps = 1;
231
234
235 Tensor<type, 1> biases;
236
237 Tensor<type, 2> input_weights;
238
240
241 Tensor<type, 2> recurrent_weights;
242
244
245 ActivationFunction activation_function = ActivationFunction::HyperbolicTangent;
246
247 Tensor<type, 1> hidden_states;
248
250
251 bool display = true;
252
253#ifdef OPENNN_CUDA
254 #include "../../opennn-cuda/opennn-cuda/recurrent_layer_cuda.h"
255#else
256};
257#endif
258
260{
262 {
263 }
264
265 explicit RecurrentLayerForwardPropagation(const Index& new_batch_samples_number, Layer* new_layer_pointer) : LayerForwardPropagation()
266 {
267 set(new_batch_samples_number, new_layer_pointer);
268 }
269
270 void set(const Index& new_batch_samples_number, Layer* new_layer_pointer)
271 {
272 layer_pointer = new_layer_pointer;
273
274 batch_samples_number = new_batch_samples_number;
275
276 const Index neurons_number = layer_pointer->get_neurons_number();
277 const Index inputs_number = layer_pointer->get_inputs_number();
278
279 previous_activations.resize(neurons_number);
280
281 current_inputs.resize(inputs_number);
282 current_combinations.resize(neurons_number);
283 current_activations_derivatives.resize(neurons_number);
284
285 combinations.resize(batch_samples_number, neurons_number);
286
287 activations.resize(batch_samples_number, neurons_number);
288
289 activations_derivatives.resize(batch_samples_number, neurons_number);
290 }
291
292 void print() const
293 {
294 }
295
296 Tensor<type, 1> previous_activations;
297
298 Tensor<type, 1> current_inputs;
299 Tensor<type, 1> current_combinations;
300 Tensor<type, 1> current_activations_derivatives;
301
302 Tensor<type, 2> combinations;
303 Tensor<type, 2> activations;
304 Tensor<type, 2> activations_derivatives;
305};
306
307
309{
311 {
312 }
313
314 explicit RecurrentLayerBackPropagation(const Index& new_batch_samples_number, Layer* new_layer_pointer)
316 {
317 set(new_batch_samples_number, new_layer_pointer);
318 }
319
320
321 void set(const Index& new_batch_samples_number, Layer* new_layer_pointer)
322 {
323 layer_pointer = new_layer_pointer;
324
325 batch_samples_number = new_batch_samples_number;
326
327 const Index neurons_number = layer_pointer->get_neurons_number();
328 const Index inputs_number = layer_pointer->get_inputs_number();
329
330 current_layer_deltas.resize(neurons_number);
331
332 biases_derivatives.resize(neurons_number);
333
334 input_weights_derivatives.resize(inputs_number * neurons_number);
335
336 recurrent_weights_derivatives.resize(neurons_number * neurons_number);
337
338 delta.resize(batch_samples_number, neurons_number);
339
340 combinations_biases_derivatives.resize(neurons_number, neurons_number);
341 combinations_weights_derivatives.resize(inputs_number*neurons_number, neurons_number);
342 combinations_recurrent_weights_derivatives.resize(neurons_number*neurons_number, neurons_number);
343 }
344
345
346 void print() const
347 {
348
349 }
350
351 Tensor<type, 1> current_layer_deltas;
352
353 Tensor<type, 1> biases_derivatives;
354
355 Tensor<type, 1> input_weights_derivatives;
356
357 Tensor<type, 1> recurrent_weights_derivatives;
358
359 Tensor<type, 2> combinations_biases_derivatives;
360 Tensor<type, 2> combinations_weights_derivatives;
361 Tensor<type, 2> combinations_recurrent_weights_derivatives;
362
363 Tensor<type, 2> delta;
364};
365
366
367
368
369}
370
371#endif
372
373// OpenNN: Open Neural Networks Library.
374// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.
375//
376// This library is free software; you can redistribute it and/or
377// modify it under the terms of the GNU Lesser General Public
378// License as published by the Free Software Foundation; either
379// version 2.1 of the License, or any later version.
380//
381// This library is distributed in the hope that it will be useful,
382// but WITHOUT ANY WARRANTY; without even the implied warranty of
383// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
384// Lesser General Public License for more details.
385
386// You should have received a copy of the GNU Lesser General Public
387// License along with this library; if not, write to the Free Software
388
389// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
This abstract class represents the concept of layer of neurons in OpenNN.
Definition: layer.h:53
virtual Index get_inputs_number() const
Returns the number of inputs.
Definition: layer.cpp:153
const Tensor< type, 2 > & get_recurrent_weights() const
string write_activation_function() const
void set_parameters_constant(const type &)
string write_activations_python() const
void set_input_weights_constant(const type &)
const bool & get_display() const
Index get_inputs_number() const
Returns the number of inputs to the layer.
void set_biases_constant(const type &)
string write_expression(const Tensor< string, 1 > &, const Tensor< string, 1 > &) const
Tensor< type, 2 > recurrent_weights
This matrix containing conection strengths from a recurrent layer inputs to its neurons.
bool display
Display messages to screen.
ActivationFunction
Enumeration of the available activation functions for the recurrent layer.
void set_activation_function(const ActivationFunction &)
void set_recurrent_weights_constant(const type &)
Tensor< type, 1 > biases
const Tensor< type, 2 > & get_input_weights() const
void set_inputs_number(const Index &)
Index get_neurons_number() const
Returns the size of the neurons vector.
const RecurrentLayer::ActivationFunction & get_activation_function() const
Returns the activation function of the layer.
void set_parameters(const Tensor< type, 1 > &, const Index &=0)
void set_display(const bool &)
void set_hidden_states_constant(const type &)
void initialize_input_weights_Glorot(const type &, const type &)
const Tensor< type, 1 > & get_hidden_states() const
Returns the hidden states of the layer.
Index get_parameters_number() const
Returns the number of parameters (biases and weights) of the layer.
ActivationFunction activation_function
Activation function variable.
Tensor< type, 1 > get_biases() const
void set_neurons_number(const Index &)
Tensor< type, 1 > get_parameters() const
LayerBackPropagation()
Default constructor.
Definition: layer.h:305
LayerForwardPropagation()
Default constructor.
Definition: layer.h:285