quasi_newton_method.h
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// Q U A S I - N E W T O N M E T H O D C L A S S H E A D E R
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#ifndef QUASINEWTONMETHOD_H
10#define QUASINEWTONMETHOD_H
11
12// System includes
13
14#include <string>
15#include <sstream>
16#include <iostream>
17#include <fstream>
18#include <algorithm>
19#include <functional>
20#include <limits>
21#include <cmath>
22#include <ctime>
23
24// OpenNN includes
25
26#include "config.h"
27
28#include "loss_index.h"
29
30#include "optimization_algorithm.h"
31#include "learning_rate_algorithm.h"
32
33// Eigen Includes
34
35#include "../eigen/unsupported/Eigen/KroneckerProduct"
36
37using Eigen::MatrixXd;
38
39namespace OpenNN
40{
41
42struct QuasiNewtonMehtodData;
43
46
51
53{
54
55public:
56
57 // Enumerations
58
60
61 enum class InverseHessianApproximationMethod{DFP, BFGS};
62
63 // Constructors
64
65 explicit QuasiNewtonMethod();
66
68
69 // Destructor
70
71 virtual ~QuasiNewtonMethod();
72
73 // Get methods
74
77
80
81 const Index& get_epochs_number() const;
82
83 // Stopping criteria
84
85 const type& get_minimum_loss_decrease() const;
86 const type& get_loss_goal() const;
87
88 const Index& get_maximum_selection_failures() const;
89
90 const Index& get_maximum_epochs_number() const;
91 const type& get_maximum_time() const;
92
93 // Set methods
94
96
99
100 void set_display(const bool&);
101
102 void set_default();
103
104 // Stopping criteria
105
106
107
108 void set_minimum_loss_decrease(const type&);
109 void set_loss_goal(const type&);
110
111 void set_maximum_selection_failures(const Index&);
112
113 void set_maximum_epochs_number(const Index&);
114 void set_maximum_time(const type&);
115
116 // Training methods
117
119
121
122 void initialize_inverse_hessian_approximation(QuasiNewtonMehtodData&) const;
124
125 const Tensor<type, 2> kronecker_product(Tensor<type, 2>&, Tensor<type, 2>&) const;
126 const Tensor<type, 2> kronecker_product(Tensor<type, 1>&, Tensor<type, 1>&) const;
127
129 const DataSetBatch& batch,
130 NeuralNetworkForwardPropagation& forward_propagation,
131 LossIndexBackPropagation& back_propagation,
132 QuasiNewtonMehtodData& optimization_data);
133
135
136 string write_optimization_algorithm_type() const;
137
138 // Serialization methods
139
140 void from_XML(const tinyxml2::XMLDocument&);
141
142 void write_XML(tinyxml2::XMLPrinter&) const;
143
144 Tensor<string, 2> to_string_matrix() const;
145
146private:
147
150
152
154
156
157 type first_learning_rate = static_cast<type>(0.01);
158
159 // Stopping criteria
160
162
164
166
168
171
173
175
177
179
181};
182
183
185{
187
189 {
190 }
191
192 explicit QuasiNewtonMehtodData(QuasiNewtonMethod* new_quasi_newton_method_pointer)
193 {
194 set(new_quasi_newton_method_pointer);
195 }
196
197 virtual ~QuasiNewtonMehtodData() {}
198
199 void set(QuasiNewtonMethod* new_quasi_newton_method_pointer)
200 {
201 quasi_newton_method_pointer = new_quasi_newton_method_pointer;
202
203 LossIndex* loss_index_pointer = quasi_newton_method_pointer->get_loss_index_pointer();
204
205 NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
206
207 const Index parameters_number = neural_network_pointer->get_parameters_number();
208
209 // Neural network data
210
211 old_parameters.resize(parameters_number);
212
213 parameters_difference.resize(parameters_number);
214
215 potential_parameters.resize(parameters_number);
216 parameters_increment.resize(parameters_number);
217
218 // Loss index data
219
220 old_gradient.resize(parameters_number);
221 old_gradient.setZero();
222
223 gradient_difference.resize(parameters_number);
224
225 inverse_hessian.resize(parameters_number, parameters_number);
226 inverse_hessian.setZero();
227
228 old_inverse_hessian.resize(parameters_number, parameters_number);
229 old_inverse_hessian.setZero();
230
231 // Optimization algorithm data
232
233 training_direction.resize(parameters_number);
234
235 old_inverse_hessian_dot_gradient_difference.resize(parameters_number);
236 }
237
238 void print() const
239 {
240 cout << "Training Direction:" << endl;
241 cout << training_direction << endl;
242
243 cout << "Learning rate:" << endl;
244 cout << learning_rate << endl;
245 }
246
247 QuasiNewtonMethod* quasi_newton_method_pointer = nullptr;
248
249 // Neural network data
250
251 Tensor<type, 1> old_parameters;
252 Tensor<type, 1> parameters_difference;
253
254 Tensor<type, 1> parameters_increment;
255
256 // Loss index data
257
258// type old_loss = 0;
259
260 Tensor<type, 1> old_gradient;
261 Tensor<type, 1> gradient_difference;
262
263 Tensor<type, 2> inverse_hessian;
264 Tensor<type, 2> old_inverse_hessian;
265
266 Tensor<type, 1> old_inverse_hessian_dot_gradient_difference;
267
268 // Optimization algorithm data
269
270 Index epoch = 0;
271
272 Tensor<type, 0> training_slope;
273
274 type learning_rate = type(0);
275 type old_learning_rate = type(0);
276};
277
278}
279
280#endif
281
282
283// OpenNN: Open Neural Networks Library.
284// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.
285//
286// This library is free software; you can redistribute it and/or
287// modify it under the terms of the GNU Lesser General Public
288// License as published by the Free Software Foundation; either
289// version 2.1 of the License, or any later version.
290//
291// This library is distributed in the hope that it will be useful,
292// but WITHOUT ANY WARRANTY; without even the implied warranty of
293// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
294// Lesser General Public License for more details.
295
296// You should have received a copy of the GNU Lesser General Public
297// License along with this library; if not, write to the Free Software
298// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
A learning rate that is adjusted according to an algorithm during training to minimize training time.
This abstract class represents the concept of loss index composed of an error term and a regularizati...
Definition: loss_index.h:48
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
Definition: loss_index.h:70
Index get_parameters_number() const
void set_maximum_selection_failures(const Index &)
void update_parameters(const DataSetBatch &batch, NeuralNetworkForwardPropagation &forward_propagation, LossIndexBackPropagation &back_propagation, QuasiNewtonMehtodData &optimization_data)
QuasiNewtonMethod::update_parameters.
const InverseHessianApproximationMethod & get_inverse_hessian_approximation_method() const
Returns the method for approximating the inverse hessian matrix to be used when training.
void set_loss_index_pointer(LossIndex *)
const type & get_maximum_time() const
Returns the maximum training time.
const type & get_loss_goal() const
void from_XML(const tinyxml2::XMLDocument &)
void calculate_DFP_inverse_hessian(QuasiNewtonMehtodData &) const
void set_default()
Sets the members of the optimization algorithm object to their default values.
InverseHessianApproximationMethod inverse_hessian_approximation_method
Variable containing the actual method used to obtain a suitable learning rate.
const Index & get_maximum_epochs_number() const
Returns the maximum number of epochs for training.
const Tensor< type, 2 > kronecker_product(Tensor< type, 2 > &, Tensor< type, 2 > &) const
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
type minimum_loss_decrease
Minimum loss improvement between two successive epochs. It is used as a stopping criterion.
LearningRateAlgorithm * get_learning_rate_algorithm_pointer()
Returns a pointer to the learning rate algorithm object inside the quasi-Newton method object.
const LearningRateAlgorithm & get_learning_rate_algorithm() const
Returns a constant reference to the learning rate algorithm object inside the quasi-Newton method obj...
void set_maximum_time(const type &)
void set_inverse_hessian_approximation_method(const InverseHessianApproximationMethod &)
void calculate_BFGS_inverse_hessian(QuasiNewtonMehtodData &) const
InverseHessianApproximationMethod
Enumeration of the available training operators for obtaining the approximation to the inverse hessia...
LearningRateAlgorithm learning_rate_algorithm
type maximum_time
Maximum training time. It is used as a stopping criterion.
void set_maximum_epochs_number(const Index &)
void set_minimum_loss_decrease(const type &)
void calculate_inverse_hessian_approximation(QuasiNewtonMehtodData &) const
string write_inverse_hessian_approximation_method() const
Returns the name of the method for the approximation of the inverse hessian.
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
Index maximum_epochs_number
Maximum number of epochs to perform_training. It is used as a stopping criterion.
void write_XML(tinyxml2::XMLPrinter &) const
const Index & get_maximum_selection_failures() const
Returns the maximum number of selection error increases during the training process.
const type & get_minimum_loss_decrease() const
Returns the minimum loss improvement during training.
QuasiNewtonMehtodData()
Default constructor.
This structure contains the optimization algorithm results.