documentation/reference/quasi__newton__method_8h_source.html

//   OpenNN: Open Neural Networks Library

//   www.opennn.net

//

//   Q U A S I - N E W T O N   M E T H O D    C L A S S   H E A D E R

//

//   Artificial Intelligence Techniques SL

//   artelnics@artelnics.com


#ifndef QUASINEWTONMETHOD_H

#define QUASINEWTONMETHOD_H


// System includes


#include <string>

#include <sstream>

#include <iostream>

#include <fstream>

#include <algorithm>

#include <functional>

#include <limits>

#include <cmath>

#include <ctime>


// OpenNN includes


#include "config.h"


#include "loss_index.h"


#include "optimization_algorithm.h"

#include "learning_rate_algorithm.h"


// Eigen Includes


#include "../eigen/unsupported/Eigen/KroneckerProduct"


using Eigen::MatrixXd;


namespace OpenNN

{


struct QuasiNewtonMehtodData;


class QuasiNewtonMethod : public OptimizationAlgorithm

{


public:


   // Enumerations


   enum class InverseHessianApproximationMethod{DFP, BFGS};


   // Constructors


   explicit QuasiNewtonMethod();


   explicit QuasiNewtonMethod(LossIndex*);


   // Destructor


   virtual ~QuasiNewtonMethod();


   // Get methods


   const LearningRateAlgorithm& get_learning_rate_algorithm() const;

   LearningRateAlgorithm* get_learning_rate_algorithm_pointer();


   const InverseHessianApproximationMethod& get_inverse_hessian_approximation_method() const;

   string write_inverse_hessian_approximation_method() const;


   const Index& get_epochs_number() const;


   // Stopping criteria


   const type& get_minimum_loss_decrease() const;

   const type& get_loss_goal() const;


   const Index& get_maximum_selection_failures() const;


   const Index& get_maximum_epochs_number() const;

   const type& get_maximum_time() const;


   // Set methods


   void set_loss_index_pointer(LossIndex*);


   void set_inverse_hessian_approximation_method(const InverseHessianApproximationMethod&);

   void set_inverse_hessian_approximation_method(const string&);


   void set_display(const bool&);


   void set_default();


   // Stopping criteria


   void set_minimum_loss_decrease(const type&);

   void set_loss_goal(const type&);


   void set_maximum_selection_failures(const Index&);


   void set_maximum_epochs_number(const Index&);

   void set_maximum_time(const type&);


   // Training methods


   void calculate_DFP_inverse_hessian(QuasiNewtonMehtodData&) const;


   void calculate_BFGS_inverse_hessian(QuasiNewtonMehtodData&) const;


   void initialize_inverse_hessian_approximation(QuasiNewtonMehtodData&) const;

   void calculate_inverse_hessian_approximation(QuasiNewtonMehtodData&) const;


   const Tensor<type, 2> kronecker_product(Tensor<type, 2>&, Tensor<type, 2>&) const;

   const Tensor<type, 2> kronecker_product(Tensor<type, 1>&, Tensor<type, 1>&) const;


   void update_parameters(

           const DataSetBatch& batch,

           NeuralNetworkForwardPropagation& forward_propagation,

           LossIndexBackPropagation& back_propagation,

           QuasiNewtonMehtodData& optimization_data);


   TrainingResults perform_training();


   string write_optimization_algorithm_type() const;


   // Serialization methods


   void from_XML(const tinyxml2::XMLDocument&);


   void write_XML(tinyxml2::XMLPrinter&) const;


   Tensor<string, 2> to_string_matrix() const;


private:


   LearningRateAlgorithm learning_rate_algorithm;


   InverseHessianApproximationMethod inverse_hessian_approximation_method;


   type first_learning_rate = static_cast<type>(0.01);


   // Stopping criteria


   type minimum_loss_decrease;


   type training_loss_goal;


   Index maximum_selection_failures;


   Index maximum_epochs_number;


   type maximum_time;

};


struct QuasiNewtonMehtodData : public OptimizationAlgorithmData

{


    explicit QuasiNewtonMehtodData()

    {

    }


    explicit QuasiNewtonMehtodData(QuasiNewtonMethod* new_quasi_newton_method_pointer)

    {

        set(new_quasi_newton_method_pointer);

    }


    virtual ~QuasiNewtonMehtodData() {}


    void set(QuasiNewtonMethod* new_quasi_newton_method_pointer)

    {

        quasi_newton_method_pointer = new_quasi_newton_method_pointer;


        LossIndex* loss_index_pointer = quasi_newton_method_pointer->get_loss_index_pointer();


        NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();


        const Index parameters_number = neural_network_pointer->get_parameters_number();


        // Neural network data


        old_parameters.resize(parameters_number);


        parameters_difference.resize(parameters_number);


        potential_parameters.resize(parameters_number);

        parameters_increment.resize(parameters_number);


        // Loss index data


        old_gradient.resize(parameters_number);

        old_gradient.setZero();


        gradient_difference.resize(parameters_number);


        inverse_hessian.resize(parameters_number, parameters_number);

        inverse_hessian.setZero();


        old_inverse_hessian.resize(parameters_number, parameters_number);

        old_inverse_hessian.setZero();


        // Optimization algorithm data


        training_direction.resize(parameters_number);


        old_inverse_hessian_dot_gradient_difference.resize(parameters_number);

    }


    void print() const

    {

        cout << "Training Direction:" << endl;

        cout << training_direction << endl;


        cout << "Learning rate:" << endl;

        cout << learning_rate << endl;

    }


    QuasiNewtonMethod* quasi_newton_method_pointer = nullptr;


    // Neural network data


    Tensor<type, 1> old_parameters;

    Tensor<type, 1> parameters_difference;


    Tensor<type, 1> parameters_increment;


    // Loss index data


//    type old_loss = 0;


    Tensor<type, 1> old_gradient;

    Tensor<type, 1> gradient_difference;


    Tensor<type, 2> inverse_hessian;

    Tensor<type, 2> old_inverse_hessian;


    Tensor<type, 1> old_inverse_hessian_dot_gradient_difference;


    // Optimization algorithm data


    Index epoch = 0;


    Tensor<type, 0> training_slope;


    type learning_rate = type(0);

    type old_learning_rate = type(0);

};


}


#endif


// OpenNN: Open Neural Networks Library.

// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.

//

// This library is free software; you can redistribute it and/or

// modify it under the terms of the GNU Lesser General Public

// License as published by the Free Software Foundation; either

// version 2.1 of the License, or any later version.

//

// This library is distributed in the hope that it will be useful,

// but WITHOUT ANY WARRANTY; without even the implied warranty of

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

// Lesser General Public License for more details.


// You should have received a copy of the GNU Lesser General Public

// License along with this library; if not, write to the Free Software

// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

OpenNN::LearningRateAlgorithm
A learning rate that is adjusted according to an algorithm during training to minimize training time.
Definition: learning_rate_algorithm.h:41

OpenNN::LossIndex
This abstract class represents the concept of loss index composed of an error term and a regularizati...
Definition: loss_index.h:48

OpenNN::LossIndex::get_neural_network_pointer
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
Definition: loss_index.h:70

OpenNN::NeuralNetwork::get_parameters_number
Index get_parameters_number() const
Definition: neural_network.cpp:1044

OpenNN::OptimizationAlgorithm
Definition: optimization_algorithm.h:42

OpenNN::OptimizationAlgorithm::get_loss_index_pointer
LossIndex * get_loss_index_pointer() const
Definition: optimization_algorithm.cpp:54

OpenNN::QuasiNewtonMethod
Definition: quasi_newton_method.h:53

OpenNN::QuasiNewtonMethod::perform_training
TrainingResults perform_training()
Definition: quasi_newton_method.cpp:592

OpenNN::QuasiNewtonMethod::set_maximum_selection_failures
void set_maximum_selection_failures(const Index &)
Definition: quasi_newton_method.cpp:249

OpenNN::QuasiNewtonMethod::update_parameters
void update_parameters(const DataSetBatch &batch, NeuralNetworkForwardPropagation &forward_propagation, LossIndexBackPropagation &back_propagation, QuasiNewtonMehtodData &optimization_data)
QuasiNewtonMethod::update_parameters.
Definition: quasi_newton_method.cpp:479

OpenNN::QuasiNewtonMethod::get_inverse_hessian_approximation_method
const InverseHessianApproximationMethod & get_inverse_hessian_approximation_method() const
Returns the method for approximating the inverse hessian matrix to be used when training.
Definition: quasi_newton_method.cpp:65

OpenNN::QuasiNewtonMethod::set_loss_index_pointer
void set_loss_index_pointer(LossIndex *)
Definition: quasi_newton_method.cpp:145

OpenNN::QuasiNewtonMethod::get_maximum_time
const type & get_maximum_time() const
Returns the maximum training time.
Definition: quasi_newton_method.cpp:135

OpenNN::QuasiNewtonMethod::get_loss_goal
const type & get_loss_goal() const
Definition: quasi_newton_method.cpp:111

OpenNN::QuasiNewtonMethod::from_XML
void from_XML(const tinyxml2::XMLDocument &)
Definition: quasi_newton_method.cpp:953

OpenNN::QuasiNewtonMethod::calculate_DFP_inverse_hessian
void calculate_DFP_inverse_hessian(QuasiNewtonMehtodData &) const
Definition: quasi_newton_method.cpp:390

OpenNN::QuasiNewtonMethod::set_default
void set_default()
Sets the members of the optimization algorithm object to their default values.
Definition: quasi_newton_method.cpp:205

OpenNN::QuasiNewtonMethod::inverse_hessian_approximation_method
InverseHessianApproximationMethod inverse_hessian_approximation_method
Variable containing the actual method used to obtain a suitable learning rate.
Definition: quasi_newton_method.h:155

OpenNN::QuasiNewtonMethod::get_maximum_epochs_number
const Index & get_maximum_epochs_number() const
Returns the maximum number of epochs for training.
Definition: quasi_newton_method.cpp:127

OpenNN::QuasiNewtonMethod::kronecker_product
const Tensor< type, 2 > kronecker_product(Tensor< type, 2 > &, Tensor< type, 2 > &) const
Definition: quasi_newton_method.cpp:360

OpenNN::QuasiNewtonMethod::to_string_matrix
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
Definition: quasi_newton_method.cpp:905

OpenNN::QuasiNewtonMethod::minimum_loss_decrease
type minimum_loss_decrease
Minimum loss improvement between two successive epochs. It is used as a stopping criterion.
Definition: quasi_newton_method.h:163

OpenNN::QuasiNewtonMethod::get_learning_rate_algorithm_pointer
LearningRateAlgorithm * get_learning_rate_algorithm_pointer()
Returns a pointer to the learning rate algorithm object inside the quasi-Newton method object.
Definition: quasi_newton_method.cpp:57

OpenNN::QuasiNewtonMethod::get_learning_rate_algorithm
const LearningRateAlgorithm & get_learning_rate_algorithm() const
Returns a constant reference to the learning rate algorithm object inside the quasi-Newton method obj...
Definition: quasi_newton_method.cpp:49

OpenNN::QuasiNewtonMethod::set_maximum_time
void set_maximum_time(const type &)
Definition: quasi_newton_method.cpp:267

OpenNN::QuasiNewtonMethod::set_inverse_hessian_approximation_method
void set_inverse_hessian_approximation_method(const InverseHessianApproximationMethod &)
Definition: quasi_newton_method.cpp:156

OpenNN::QuasiNewtonMethod::calculate_BFGS_inverse_hessian
void calculate_BFGS_inverse_hessian(QuasiNewtonMehtodData &) const
Definition: quasi_newton_method.cpp:430

OpenNN::QuasiNewtonMethod::InverseHessianApproximationMethod
InverseHessianApproximationMethod
Enumeration of the available training operators for obtaining the approximation to the inverse hessia...
Definition: quasi_newton_method.h:61

OpenNN::QuasiNewtonMethod::learning_rate_algorithm
LearningRateAlgorithm learning_rate_algorithm
Definition: quasi_newton_method.h:151

OpenNN::QuasiNewtonMethod::set_loss_goal
void set_loss_goal(const type &)
Definition: quasi_newton_method.cpp:240

OpenNN::QuasiNewtonMethod::maximum_time
type maximum_time
Maximum training time. It is used as a stopping criterion.
Definition: quasi_newton_method.h:180

OpenNN::QuasiNewtonMethod::set_maximum_epochs_number
void set_maximum_epochs_number(const Index &)
Definition: quasi_newton_method.cpp:258

OpenNN::QuasiNewtonMethod::set_minimum_loss_decrease
void set_minimum_loss_decrease(const type &)
Definition: quasi_newton_method.cpp:230

OpenNN::QuasiNewtonMethod::calculate_inverse_hessian_approximation
void calculate_inverse_hessian_approximation(QuasiNewtonMehtodData &) const
Definition: quasi_newton_method.cpp:307

OpenNN::QuasiNewtonMethod::QuasiNewtonMethod
QuasiNewtonMethod()
Definition: quasi_newton_method.cpp:18

OpenNN::QuasiNewtonMethod::write_inverse_hessian_approximation_method
string write_inverse_hessian_approximation_method() const
Returns the name of the method for the approximation of the inverse hessian.
Definition: quasi_newton_method.cpp:73

OpenNN::QuasiNewtonMethod::training_loss_goal
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
Definition: quasi_newton_method.h:167

OpenNN::QuasiNewtonMethod::maximum_epochs_number
Index maximum_epochs_number
Maximum number of epochs to perform_training. It is used as a stopping criterion.
Definition: quasi_newton_method.h:176

OpenNN::QuasiNewtonMethod::set_display
void set_display(const bool &)
Definition: quasi_newton_method.cpp:199

OpenNN::QuasiNewtonMethod::write_XML
void write_XML(tinyxml2::XMLPrinter &) const
Definition: quasi_newton_method.cpp:815

OpenNN::QuasiNewtonMethod::~QuasiNewtonMethod
virtual ~QuasiNewtonMethod()
Definition: quasi_newton_method.cpp:42

OpenNN::QuasiNewtonMethod::get_maximum_selection_failures
const Index & get_maximum_selection_failures() const
Returns the maximum number of selection error increases during the training process.
Definition: quasi_newton_method.cpp:119

OpenNN::QuasiNewtonMethod::maximum_selection_failures
Index maximum_selection_failures
Definition: quasi_newton_method.h:172

OpenNN::QuasiNewtonMethod::get_minimum_loss_decrease
const type & get_minimum_loss_decrease() const
Returns the minimum loss improvement during training.
Definition: quasi_newton_method.cpp:102

tinyxml2::XMLDocument
Definition: tinyxml2.h:1653

tinyxml2::XMLPrinter
Definition: tinyxml2.h:2154

OpenNN::DataSetBatch
Definition: data_set.h:887

OpenNN::LossIndexBackPropagation
Definition: loss_index.h:290

OpenNN::NeuralNetworkForwardPropagation
Definition: neural_network.h:262

OpenNN::OptimizationAlgorithmData
Definition: optimization_algorithm.h:168

OpenNN::QuasiNewtonMehtodData
Definition: quasi_newton_method.h:185

OpenNN::QuasiNewtonMehtodData::QuasiNewtonMehtodData
QuasiNewtonMehtodData()
Default constructor.
Definition: quasi_newton_method.h:188

OpenNN::TrainingResults
This structure contains the optimization algorithm results.
Definition: optimization_algorithm.h:198