stochastic_gradient_descent.h
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// S T O C H A S T I C G R A D I E N T D E S C E N T C L A S S H E A D E R
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#ifndef STOCHASTICGRADIENTDESCENT_H
10#define STOCHASTICGRADIENTDESCENT_H
11
12// System includes
13
14#include <string>
15#include <sstream>
16#include <iostream>
17#include <fstream>
18#include <algorithm>
19#include <functional>
20#include <limits>
21#include <cmath>
22#include <ctime>
23
24// OpenNN includes
25
26#include "config.h"
27
28#include "loss_index.h"
29#include "optimization_algorithm.h"
30
31namespace OpenNN
32{
33
34struct StochasticGradientDescentData;
35
37
42
44{
45
46public:
47
48 // Constructors
49
50 explicit StochasticGradientDescent();
51
53
54 // Destructor
55
57
58 //Training operators
59
60 const type& get_initial_learning_rate() const;
61 const type& get_initial_decay() const;
62 const type& get_momentum() const;
63 const bool& get_nesterov() const;
64
65 // Stopping criteria
66
67 const type& get_loss_goal() const;
68 const type& get_maximum_time() const;
69
70 // Set methods
71
73
74 void set_default();
75
76 void set_batch_samples_number(const Index& new_batch_samples_number)
77 {
78 batch_samples_number = new_batch_samples_number;
79 }
80
81 // Get methods
82
83 Index get_batch_samples_number() const;
84
85 //Training operators
86
87 void set_initial_learning_rate(const type&);
88 void set_initial_decay(const type&);
89 void set_momentum(const type&);
90 void set_nesterov(const bool&);
91
92 void set_maximum_epochs_number(const Index&);
93
94 // Stopping criteria
95
96 void set_loss_goal(const type&);
97 void set_maximum_time(const type&);
98
99 // Training methods
100
101 void update_parameters(LossIndexBackPropagation& back_propagation,
102 StochasticGradientDescentData& optimization_data);
103
105
106 string write_optimization_algorithm_type() const;
107
108 // Serialization methods
109
110 Tensor<string, 2> to_string_matrix() const;
111
112 void from_XML(const tinyxml2::XMLDocument&);
113
114 void write_XML(tinyxml2::XMLPrinter&) const;
115
116private:
117
118 // Training operators
119
121
123
125
127
129
131
133
135
137
139
140 // Stopping criteria
141
143
144 type training_loss_goal = type(0);
145
147
148 Index maximum_selection_failures = numeric_limits<Index>::max();
149
151
153
155
156 type maximum_time = type(3600);
157
158#ifdef OPENNN_CUDA
159 #include "../../opennn-cuda/opennn-cuda/stochastic_gradient_descent_cuda.h"
160#endif
161
162};
163
164
166{
168
170 {
171 }
172
173 explicit StochasticGradientDescentData(StochasticGradientDescent* new_stochastic_gradient_descent_pointer)
174 {
175 set(new_stochastic_gradient_descent_pointer);
176 }
177
178 virtual ~StochasticGradientDescentData() {}
179
180 void set(StochasticGradientDescent* new_stochastic_gradient_descent_pointer)
181 {
182 stochastic_gradient_descent_pointer = new_stochastic_gradient_descent_pointer;
183
184 LossIndex* loss_index_pointer = stochastic_gradient_descent_pointer->get_loss_index_pointer();
185
186 NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
187
188 const Index parameters_number = neural_network_pointer->get_parameters_number();
189
190 parameters_increment.resize(parameters_number);
191 nesterov_increment.resize(parameters_number);
192 last_parameters_increment.resize(parameters_number);
193
194 parameters_increment.setZero();
195 nesterov_increment.setZero();
196 last_parameters_increment.setZero();
197 }
198
199 StochasticGradientDescent* stochastic_gradient_descent_pointer = nullptr;
200
201 Index iteration = 0;
202
203 Tensor<type, 1> parameters_increment;
204 Tensor<type, 1> nesterov_increment;
205 Tensor<type, 1> last_parameters_increment;
206};
207
208}
209
210#endif
This abstract class represents the concept of loss index composed of an error term and a regularizati...
Definition: loss_index.h:48
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
Definition: loss_index.h:70
Index get_parameters_number() const
This concrete class represents the stochastic gradient descent optimization algorithm[1] for a loss i...
const type & get_maximum_time() const
Returns the maximum training time.
type momentum
Parameter that accelerates SGD in the relevant direction and dampens oscillations.
void from_XML(const tinyxml2::XMLDocument &)
void set_default()
Sets the members of the optimization algorithm object to their default values.
type initial_learning_rate
Initial learning rate.
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
const type & get_momentum() const
Returns the momentum.
const bool & get_nesterov() const
Returns true if nesterov is active, and false otherwise.
type maximum_time
Maximum training time. It is used as a stopping criterion.
type initial_decay
Learning rate decay over each update.
void update_parameters(LossIndexBackPropagation &back_propagation, StochasticGradientDescentData &optimization_data)
Set hardware to use. Default: Multi-core.
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
Index maximum_epochs_number
Maximum epochs number.
void write_XML(tinyxml2::XMLPrinter &) const
bool nesterov
Boolean. Whether to apply Nesterov momentum.
Index batch_samples_number
Number of samples per training batch.
Index maximum_selection_failures
Maximum selection error allowed.
const type & get_initial_learning_rate() const
Returns the initial learning rate.
const type & get_initial_decay() const
Returns the initial decay.
This structure contains the optimization algorithm results.