9#ifndef QUASINEWTONMETHOD_H
10#define QUASINEWTONMETHOD_H
28#include "loss_index.h"
30#include "optimization_algorithm.h"
31#include "learning_rate_algorithm.h"
35#include "../eigen/unsupported/Eigen/KroneckerProduct"
42struct QuasiNewtonMehtodData;
81 const Index& get_epochs_number()
const;
125 const Tensor<type, 2>
kronecker_product(Tensor<type, 2>&, Tensor<type, 2>&)
const;
126 const Tensor<type, 2>
kronecker_product(Tensor<type, 1>&, Tensor<type, 1>&)
const;
136 string write_optimization_algorithm_type()
const;
157 type first_learning_rate =
static_cast<type
>(0.01);
194 set(new_quasi_newton_method_pointer);
197 virtual ~QuasiNewtonMehtodData() {}
199 void set(QuasiNewtonMethod* new_quasi_newton_method_pointer)
201 quasi_newton_method_pointer = new_quasi_newton_method_pointer;
211 old_parameters.resize(parameters_number);
213 parameters_difference.resize(parameters_number);
215 potential_parameters.resize(parameters_number);
216 parameters_increment.resize(parameters_number);
220 old_gradient.resize(parameters_number);
221 old_gradient.setZero();
223 gradient_difference.resize(parameters_number);
225 inverse_hessian.resize(parameters_number, parameters_number);
226 inverse_hessian.setZero();
228 old_inverse_hessian.resize(parameters_number, parameters_number);
229 old_inverse_hessian.setZero();
233 training_direction.resize(parameters_number);
235 old_inverse_hessian_dot_gradient_difference.resize(parameters_number);
240 cout <<
"Training Direction:" << endl;
241 cout << training_direction << endl;
243 cout <<
"Learning rate:" << endl;
244 cout << learning_rate << endl;
247 QuasiNewtonMethod* quasi_newton_method_pointer =
nullptr;
251 Tensor<type, 1> old_parameters;
252 Tensor<type, 1> parameters_difference;
254 Tensor<type, 1> parameters_increment;
260 Tensor<type, 1> old_gradient;
261 Tensor<type, 1> gradient_difference;
263 Tensor<type, 2> inverse_hessian;
264 Tensor<type, 2> old_inverse_hessian;
266 Tensor<type, 1> old_inverse_hessian_dot_gradient_difference;
272 Tensor<type, 0> training_slope;
274 type learning_rate = type(0);
275 type old_learning_rate = type(0);
A learning rate that is adjusted according to an algorithm during training to minimize training time.
This abstract class represents the concept of loss index composed of an error term and a regularizati...
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
Index get_parameters_number() const
LossIndex * get_loss_index_pointer() const
TrainingResults perform_training()
void set_maximum_selection_failures(const Index &)
void update_parameters(const DataSetBatch &batch, NeuralNetworkForwardPropagation &forward_propagation, LossIndexBackPropagation &back_propagation, QuasiNewtonMehtodData &optimization_data)
QuasiNewtonMethod::update_parameters.
const InverseHessianApproximationMethod & get_inverse_hessian_approximation_method() const
Returns the method for approximating the inverse hessian matrix to be used when training.
void set_loss_index_pointer(LossIndex *)
const type & get_maximum_time() const
Returns the maximum training time.
const type & get_loss_goal() const
void from_XML(const tinyxml2::XMLDocument &)
void calculate_DFP_inverse_hessian(QuasiNewtonMehtodData &) const
void set_default()
Sets the members of the optimization algorithm object to their default values.
InverseHessianApproximationMethod inverse_hessian_approximation_method
Variable containing the actual method used to obtain a suitable learning rate.
const Index & get_maximum_epochs_number() const
Returns the maximum number of epochs for training.
const Tensor< type, 2 > kronecker_product(Tensor< type, 2 > &, Tensor< type, 2 > &) const
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
type minimum_loss_decrease
Minimum loss improvement between two successive epochs. It is used as a stopping criterion.
LearningRateAlgorithm * get_learning_rate_algorithm_pointer()
Returns a pointer to the learning rate algorithm object inside the quasi-Newton method object.
const LearningRateAlgorithm & get_learning_rate_algorithm() const
Returns a constant reference to the learning rate algorithm object inside the quasi-Newton method obj...
void set_maximum_time(const type &)
void set_inverse_hessian_approximation_method(const InverseHessianApproximationMethod &)
void calculate_BFGS_inverse_hessian(QuasiNewtonMehtodData &) const
InverseHessianApproximationMethod
Enumeration of the available training operators for obtaining the approximation to the inverse hessia...
LearningRateAlgorithm learning_rate_algorithm
void set_loss_goal(const type &)
type maximum_time
Maximum training time. It is used as a stopping criterion.
void set_maximum_epochs_number(const Index &)
void set_minimum_loss_decrease(const type &)
void calculate_inverse_hessian_approximation(QuasiNewtonMehtodData &) const
string write_inverse_hessian_approximation_method() const
Returns the name of the method for the approximation of the inverse hessian.
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
Index maximum_epochs_number
Maximum number of epochs to perform_training. It is used as a stopping criterion.
void set_display(const bool &)
void write_XML(tinyxml2::XMLPrinter &) const
virtual ~QuasiNewtonMethod()
const Index & get_maximum_selection_failures() const
Returns the maximum number of selection error increases during the training process.
Index maximum_selection_failures
const type & get_minimum_loss_decrease() const
Returns the minimum loss improvement during training.
QuasiNewtonMehtodData()
Default constructor.
This structure contains the optimization algorithm results.